/*
 * Copyright (c) 2022 HiSilicon (Shanghai) Technologies CO., LIMITED.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <signal.h>
#include <pthread.h>
#include <sys/prctl.h>
#include <math.h>
#include <assert.h>

#include "hi_common.h"
#include "hi_comm_sys.h"
#include "hi_comm_svp.h"
#include "sample_comm_svp.h"
#include "hi_comm_ive.h"
#include "sample_svp_nnie_software.h"
#include "sample_media_ai.h"
#include "ai_infer_process.h"

#ifdef __cplusplus
#if __cplusplus
extern "C" {
#endif
#endif /* End of #ifdef __cplusplus */

#define USLEEP_TIME   100 // 100: usleep time, in microseconds

#define ARRAY_SUBSCRIPT_0     0
#define ARRAY_SUBSCRIPT_1     1
#define ARRAY_SUBSCRIPT_2     2
#define ARRAY_SUBSCRIPT_3     3
#define ARRAY_SUBSCRIPT_4     4
#define ARRAY_SUBSCRIPT_5     5
#define ARRAY_SUBSCRIPT_6     6
#define ARRAY_SUBSCRIPT_7     7
#define ARRAY_SUBSCRIPT_8     8
#define ARRAY_SUBSCRIPT_9     9

#define ARRAY_SUBSCRIPT_OFFSET_1    1
#define ARRAY_SUBSCRIPT_OFFSET_2    2
#define ARRAY_SUBSCRIPT_OFFSET_3    3

#define THRESH_MIN         0.25

/* cnn parameter */
static SAMPLE_SVP_NNIE_MODEL_S g_stCnnModel = {0};
static SAMPLE_SVP_NNIE_PARAM_S g_stCnnNnieParam = {0};
static SAMPLE_SVP_NNIE_CNN_SOFTWARE_PARAM_S g_stCnnSoftwareParam = {0};

/* yolov2 parameter */
static SAMPLE_SVP_NNIE_MODEL_S g_stYolov2Model = {0};
static SAMPLE_SVP_NNIE_PARAM_S g_stYolov2NnieParam = {0};
static SAMPLE_SVP_NNIE_YOLOV2_SOFTWARE_PARAM_S g_stYolov2SoftwareParam = {0};

/*****/
/* retinaface parameter */
static SAMPLE_SVP_NNIE_MODEL_S g_stRetinaFaceModel = {0};
static SAMPLE_SVP_NNIE_PARAM_S g_stRetinaFaceNnieParam = {0};
//static SAMPLE_SVP_NNIE_RETINaFACE_SOFTWARE_PARAM_S g_stRetinaFaceSoftwareParam = {0};



/*****/
/* mobileface parameter */
static SAMPLE_SVP_NNIE_MODEL_S g_stMobileFaceModel = {0};
static SAMPLE_SVP_NNIE_PARAM_S g_stMobileFaceNnieParam = {0};

/* yolofastestXL parameter */
static SAMPLE_SVP_NNIE_MODEL_S g_styolofastestXLModel = {0};
static SAMPLE_SVP_NNIE_PARAM_S g_styolofastestXLNnieParam = {0};
static SAMPLE_SVP_NNIE_YOLOFASTESTXL_SOFTWARE_PARAM_S g_stYolofastestXLSoftwareParam = {0};

float threshold = 0.7;
HI_U32 isLog = 0;

float bbox_final[4];
static anchor_generator_t* anc_gen = NULL;
int IsDebugLog = 0;
HI_S32 as32ResultDet[200 * 15] = { 0 };
HI_S32 u32ResultDetCnt = 0;
int IndexBuffer[512] = { 0 };
/*****/

/* function : Cnn software parameter init */
static HI_S32 SampleSvpNnieCnnSoftwareParaInit(SAMPLE_SVP_NNIE_CFG_S* pstNnieCfg,
    SAMPLE_SVP_NNIE_PARAM_S *pstCnnPara, SAMPLE_SVP_NNIE_CNN_SOFTWARE_PARAM_S* pstCnnSoftWarePara)
{
    HI_U32 u32GetTopNMemSize;
    HI_U32 u32GetTopBufSize;
    HI_U32 u32GetTopFrameSize;
    HI_U32 u32TotalSize;
    HI_U32 u32ClassNum = pstCnnPara->pstModel->astSeg[0].astDstNode[0].unShape.stWhc.u32Width;
    HI_U64 u64PhyAddr = 0;
    HI_U8* pu8VirAddr = NULL;
    HI_S32 s32Ret;

    /* get mem size */
    u32GetTopFrameSize = pstCnnSoftWarePara->u32TopN*sizeof(SAMPLE_SVP_NNIE_CNN_GETTOPN_UNIT_S);
    u32GetTopNMemSize = SAMPLE_SVP_NNIE_ALIGN16(u32GetTopFrameSize)*pstNnieCfg->u32MaxInputNum;
    u32GetTopBufSize = u32ClassNum*sizeof(SAMPLE_SVP_NNIE_CNN_GETTOPN_UNIT_S);
    u32TotalSize = u32GetTopNMemSize + u32GetTopBufSize;

    /* malloc mem */
    s32Ret = SAMPLE_COMM_SVP_MallocMem("SAMPLE_CNN_INIT", NULL, (HI_U64*)&u64PhyAddr,
        (void**)&pu8VirAddr, u32TotalSize);
    SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,Malloc memory failed!\n");
    memset_s(pu8VirAddr, u32TotalSize, 0, u32TotalSize);

    /* init GetTopn */
    pstCnnSoftWarePara->stGetTopN.u32Num = pstNnieCfg->u32MaxInputNum;
    pstCnnSoftWarePara->stGetTopN.unShape.stWhc.u32Chn = 1;
    pstCnnSoftWarePara->stGetTopN.unShape.stWhc.u32Height = 1;
    pstCnnSoftWarePara->stGetTopN.unShape.stWhc.u32Width = u32GetTopFrameSize / sizeof(HI_U32);
    pstCnnSoftWarePara->stGetTopN.u32Stride = SAMPLE_SVP_NNIE_ALIGN16(u32GetTopFrameSize);
    pstCnnSoftWarePara->stGetTopN.u64PhyAddr = u64PhyAddr;
    pstCnnSoftWarePara->stGetTopN.u64VirAddr = (HI_U64)(HI_UL)pu8VirAddr;

    /* init AssistBuf */
    pstCnnSoftWarePara->stAssistBuf.u32Size = u32GetTopBufSize;
    pstCnnSoftWarePara->stAssistBuf.u64PhyAddr = u64PhyAddr + u32GetTopNMemSize;
    pstCnnSoftWarePara->stAssistBuf.u64VirAddr = (HI_U64)(HI_UL)pu8VirAddr + u32GetTopNMemSize;

    return s32Ret;
}

/* function : Cnn software deinit */
static HI_S32 SampleSvpNnieCnnSoftwareDeinit(SAMPLE_SVP_NNIE_CNN_SOFTWARE_PARAM_S* pstCnnSoftWarePara)
{
    HI_S32 s32Ret = HI_SUCCESS;
    SAMPLE_SVP_CHECK_EXPR_RET(pstCnnSoftWarePara == NULL, HI_INVALID_VALUE, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error, pstCnnSoftWarePara can't be NULL!\n");
    if (pstCnnSoftWarePara->stGetTopN.u64PhyAddr != 0 && pstCnnSoftWarePara->stGetTopN.u64VirAddr != 0) {
        SAMPLE_SVP_MMZ_FREE(pstCnnSoftWarePara->stGetTopN.u64PhyAddr,
            pstCnnSoftWarePara->stGetTopN.u64VirAddr);
        pstCnnSoftWarePara->stGetTopN.u64PhyAddr = 0;
        pstCnnSoftWarePara->stGetTopN.u64VirAddr = 0;
    }
    return s32Ret;
}

/* function : Cnn Deinit */
static HI_S32 SampleSvpNnieCnnDeinit(SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam,
    SAMPLE_SVP_NNIE_CNN_SOFTWARE_PARAM_S* pstSoftWareParam, SAMPLE_SVP_NNIE_MODEL_S* pstNnieModel)
{
    HI_S32 s32Ret = HI_SUCCESS;
    /* hardware para deinit */
    if (pstNnieParam != NULL) {
        s32Ret = SAMPLE_COMM_SVP_NNIE_ParamDeinit(pstNnieParam);
        SAMPLE_SVP_CHECK_EXPR_TRACE(HI_SUCCESS != s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
            "Error,SAMPLE_COMM_SVP_NNIE_ParamDeinit failed!\n");
    }
    /* software para deinit */
    if (pstSoftWareParam != NULL) {
        s32Ret = SampleSvpNnieCnnSoftwareDeinit(pstSoftWareParam);
        SAMPLE_SVP_CHECK_EXPR_TRACE(HI_SUCCESS != s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
            "Error,SampleSvpNnieCnnSoftwareDeinit failed!\n");
    }
    /* model deinit */
    if (pstNnieModel != NULL) {
        s32Ret = SAMPLE_COMM_SVP_NNIE_UnloadModel(pstNnieModel);
        SAMPLE_SVP_CHECK_EXPR_TRACE(HI_SUCCESS != s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
            "Error,SAMPLE_COMM_SVP_NNIE_UnloadModel failed!\n");
    }
    return s32Ret;
}

/* function : Cnn init */
static HI_S32 SampleSvpNnieCnnParamInit(SAMPLE_SVP_NNIE_CFG_S* pstNnieCfg,
    SAMPLE_SVP_NNIE_PARAM_S *pstCnnPara, SAMPLE_SVP_NNIE_CNN_SOFTWARE_PARAM_S* pstCnnSoftWarePara)
{
    HI_S32 s32Ret;
    /* init hardware para */
    s32Ret = SAMPLE_COMM_SVP_NNIE_ParamInit(pstNnieCfg, pstCnnPara);
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, INIT_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error(%#x),SAMPLE_COMM_SVP_NNIE_ParamInit failed!\n", s32Ret);

    /* init software para */
    if (pstCnnSoftWarePara != NULL) {
        s32Ret = SampleSvpNnieCnnSoftwareParaInit(pstNnieCfg, pstCnnPara, pstCnnSoftWarePara);
        SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, INIT_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
            "Error(%#x),SampleSvpNnieCnnSoftwareParaInit failed!\n", s32Ret);
    }

    return s32Ret;
INIT_FAIL_0:
    s32Ret = SampleSvpNnieCnnDeinit(pstCnnPara, pstCnnSoftWarePara, NULL);
    SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error(%#x),SampleSvpNnieCnnDeinit failed!\n", s32Ret);
    return HI_FAILURE;
}

/* create CNN model based mode file */
int CnnCreate(SAMPLE_SVP_NNIE_CFG_S **model, const char* modelFile)
{
    SAMPLE_SVP_NNIE_CFG_S *self;
    HI_U32 u32PicNum = 1;
    HI_S32 s32Ret;

    self = (SAMPLE_SVP_NNIE_CFG_S*)malloc(sizeof(*self));
    HI_ASSERT(self);
    if (memset_s(self, sizeof(*self), 0x00, sizeof(*self)) != EOK) {
        HI_ASSERT(0);
    }

    // Set configuration parameter
    self->pszPic = NULL;
    self->u32MaxInputNum = u32PicNum; // max input image num in each batch
    self->u32MaxRoiNum = 0;
    self->aenNnieCoreId[0] = SVP_NNIE_ID_0; // set NNIE core
    g_stCnnSoftwareParam.u32TopN = 5; // 5: value of the u32TopN

    // Sys init
    // CNN Load model
    SAMPLE_SVP_TRACE_INFO("Cnn Load model!\n");
    s32Ret = SAMPLE_COMM_SVP_NNIE_LoadModel((char*)modelFile, &g_stCnnModel);
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, CNN_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,SAMPLE_COMM_SVP_NNIE_LoadModel failed!\n");

    // CNN parameter initialization
    // Cnn software parameters are set in SampleSvpNnieCnnSoftwareParaInit,
    // if user has changed net struct, please make sure the parameter settings in
    // SampleSvpNnieCnnSoftwareParaInit function are correct
    SAMPLE_SVP_TRACE_INFO("Cnn parameter initialization!\n");
    g_stCnnNnieParam.pstModel = &g_stCnnModel.stModel;
    s32Ret = SampleSvpNnieCnnParamInit(self, &g_stCnnNnieParam, &g_stCnnSoftwareParam);
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, CNN_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,SampleSvpNnieCnnParamInit failed!\n");

    // Model key information
    SAMPLE_PRT("model={ type=%x, frmNum=%u, chnNum=%u, w=%u, h=%u, stride=%u }\n",
        g_stCnnNnieParam.astSegData[0].astSrc[0].enType,
        g_stCnnNnieParam.astSegData[0].astSrc[0].u32Num,
        g_stCnnNnieParam.astSegData[0].astSrc[0].unShape.stWhc.u32Chn,
        g_stCnnNnieParam.astSegData[0].astSrc[0].unShape.stWhc.u32Width,
        g_stCnnNnieParam.astSegData[0].astSrc[0].unShape.stWhc.u32Height,
        g_stCnnNnieParam.astSegData[0].astSrc[0].u32Stride);

    // record tskBuf
    s32Ret = HI_MPI_SVP_NNIE_AddTskBuf(&(g_stCnnNnieParam.astForwardCtrl[0].stTskBuf));
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, CNN_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,HI_MPI_SVP_NNIE_AddTskBuf failed!\n");
    *model = self;
    return 0;

    CNN_FAIL_0:
        SampleSvpNnieCnnDeinit(&g_stCnnNnieParam, &g_stCnnSoftwareParam, &g_stCnnModel);
        *model = NULL;
        return -1;
}

/* destroy CNN model */
void CnnDestroy(SAMPLE_SVP_NNIE_CFG_S *self)
{
    HI_S32 s32Ret;

    /* Remove TskBuf */
    s32Ret = HI_MPI_SVP_NNIE_RemoveTskBuf(&(g_stCnnNnieParam.astForwardCtrl[0].stTskBuf));
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, CNN_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,HI_MPI_SVP_NNIE_RemoveTskBuf failed!\n");

    CNN_FAIL_0:
        SampleSvpNnieCnnDeinit(&g_stCnnNnieParam, &g_stCnnSoftwareParam, &g_stCnnModel);
        free(self);
}

static HI_S32 FillNnieByImg(SAMPLE_SVP_NNIE_CFG_S* pstNnieCfg,
    SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam, int segId, int nodeId, const IVE_IMAGE_S *img)
{
    HI_U32 i;
    HI_U32 j;
    HI_U32 n;
    HI_U32 u32Height = 0;
    HI_U32 u32Width = 0;
    HI_U32 u32Chn = 0;
    HI_U32 u32Stride = 0;
    HI_U32 u32VarSize;
    HI_U8 *pu8PicAddr = NULL;

    /* get data size */
    if (SVP_BLOB_TYPE_U8 <= pstNnieParam->astSegData[segId].astSrc[nodeId].enType &&
        SVP_BLOB_TYPE_YVU422SP >= pstNnieParam->astSegData[segId].astSrc[nodeId].enType) {
        u32VarSize = sizeof(HI_U8);
    } else {
        u32VarSize = sizeof(HI_U32);
    }

    /* fill src data */
    if (SVP_BLOB_TYPE_SEQ_S32 == pstNnieParam->astSegData[segId].astSrc[nodeId].enType) {
        HI_ASSERT(0);
    } else {
        u32Height = pstNnieParam->astSegData[segId].astSrc[nodeId].unShape.stWhc.u32Height;
        u32Width = pstNnieParam->astSegData[segId].astSrc[nodeId].unShape.stWhc.u32Width;
        u32Chn = pstNnieParam->astSegData[segId].astSrc[nodeId].unShape.stWhc.u32Chn;
        u32Stride = pstNnieParam->astSegData[segId].astSrc[nodeId].u32Stride;
        pu8PicAddr = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_U8,
            pstNnieParam->astSegData[segId].astSrc[nodeId].u64VirAddr);

        if (SVP_BLOB_TYPE_YVU420SP == pstNnieParam->astSegData[segId].astSrc[nodeId].enType) {
            HI_ASSERT(pstNnieParam->astSegData[segId].astSrc[nodeId].u32Num == 1);
            for (n = 0; n < pstNnieParam->astSegData[segId].astSrc[nodeId].u32Num; n++) {
                // Y
                const uint8_t *srcData = (const uint8_t*)(uintptr_t)img->au64VirAddr[0];
                HI_ASSERT(srcData);
                for (j = 0; j < u32Height; j++) {
                    if (memcpy_s(pu8PicAddr, u32Width * u32VarSize, srcData, u32Width * u32VarSize) != EOK) {
                        HI_ASSERT(0);
                    }
                    pu8PicAddr += u32Stride;
                    srcData += img->au32Stride[0];
                }
                // UV
                srcData = (const uint8_t*)(uintptr_t)img->au64VirAddr[1];
                HI_ASSERT(srcData);
                for (j = 0; j < u32Height / 2; j++) { // 2: 1/2Height
                    if (memcpy_s(pu8PicAddr, u32Width * u32VarSize, srcData, u32Width * u32VarSize) != EOK) {
                        HI_ASSERT(0);
                    }
                    pu8PicAddr += u32Stride;
                    srcData += img->au32Stride[1];
                }
            }
        } else if (SVP_BLOB_TYPE_YVU422SP == pstNnieParam->astSegData[segId].astSrc[nodeId].enType) {
            HI_ASSERT(0);
        } else {
            for (n = 0; n < pstNnieParam->astSegData[segId].astSrc[nodeId].u32Num; n++) {
                for (i = 0; i < u32Chn; i++) {
                    const uint8_t *srcData = (const uint8_t*)(uintptr_t)img->au64VirAddr[i];
                    HI_ASSERT(srcData);
                    for (j = 0; j < u32Height; j++) {
                        if (memcpy_s(pu8PicAddr, u32Width * u32VarSize, srcData, u32Width * u32VarSize) != EOK) {
                            HI_ASSERT(0);
                        }
                        pu8PicAddr += u32Stride;
                        srcData += img->au32Stride[i];
                    }
                }
            }
        }

        SAMPLE_COMM_SVP_FlushCache(pstNnieParam->astSegData[segId].astSrc[nodeId].u64PhyAddr,
            SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_VOID, pstNnieParam->astSegData[segId].astSrc[nodeId].u64VirAddr),
            pstNnieParam->astSegData[segId].astSrc[nodeId].u32Num*u32Chn*u32Height*u32Stride);
    }

    return HI_SUCCESS;
}

void CnnFetchRes(SVP_BLOB_S *pstGetTopN, HI_U32 u32TopN, RecogNumInfo resBuf[], int resSize, int* resLen)
{
    HI_ASSERT(pstGetTopN);
    HI_U32 i;
    HI_U32 j = 0;
    HI_U32 *pu32Tmp = NULL;
    HI_U32 u32Stride = pstGetTopN->u32Stride;
    if (memset_s(resBuf, resSize * sizeof(resBuf[0]), 0x00, resSize * sizeof(resBuf[0])) != EOK) {
        HI_ASSERT(0);
    }

    int resId = 0;
    pu32Tmp = (HI_U32*)((HI_UL)pstGetTopN->u64VirAddr + j * u32Stride);
    for (i = 0; i < u32TopN * 2 && resId < resSize; i += 2, resId++) { // 2: u32TopN*2
        resBuf[resId].num = pu32Tmp[i];
        resBuf[resId].score = pu32Tmp[i + 1];
    }
    *resLen = resId;
}

/* function : NNIE Forward */
static HI_S32 SAMPLE_SVP_NNIE_Forward(SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam,
    SAMPLE_SVP_NNIE_INPUT_DATA_INDEX_S* pstInputDataIdx,
    SAMPLE_SVP_NNIE_PROCESS_SEG_INDEX_S* pstProcSegIdx, HI_BOOL bInstant)
{
    HI_S32 s32Ret = HI_SUCCESS;
    HI_U32 i;
    HI_U32 j;
    HI_BOOL bFinish = HI_FALSE;
    SVP_NNIE_HANDLE hSvpNnieHandle = 0;
    HI_U32 u32TotalStepNum = 0;

    SAMPLE_COMM_SVP_FlushCache(pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx].stTskBuf.u64PhyAddr,
        SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_VOID,
        pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx].stTskBuf.u64VirAddr),
        pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx].stTskBuf.u32Size);

    for (i = 0; i < pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx].u32DstNum; i++) {
        if (pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].enType == SVP_BLOB_TYPE_SEQ_S32) {
            for (j = 0; j < pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u32Num; j++) {
                u32TotalStepNum += *(SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_U32,
                    pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].unShape.stSeq.u64VirAddrStep) + j);
            }
            SAMPLE_COMM_SVP_FlushCache(pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u64PhyAddr,
                SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_VOID,
                pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u64VirAddr),
                u32TotalStepNum*pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u32Stride);
        } else {
            SAMPLE_COMM_SVP_FlushCache(pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u64PhyAddr,
                SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_VOID,
                pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u64VirAddr),
                pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u32Num*
                pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].unShape.stWhc.u32Chn*
                pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].unShape.stWhc.u32Height*
                pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u32Stride);
        }
    }

    /* set input blob according to node name */
    if (pstInputDataIdx->u32SegIdx != pstProcSegIdx->u32SegIdx) {
        for (i = 0; i < pstNnieParam->pstModel->astSeg[pstProcSegIdx->u32SegIdx].u16SrcNum; i++) {
            for (j = 0; j < pstNnieParam->pstModel->astSeg[pstInputDataIdx->u32SegIdx].u16DstNum; j++) {
                if (strncmp(pstNnieParam->pstModel->astSeg[pstInputDataIdx->u32SegIdx].astDstNode[j].szName,
                    pstNnieParam->pstModel->astSeg[pstProcSegIdx->u32SegIdx].astSrcNode[i].szName,
                    SVP_NNIE_NODE_NAME_LEN) == 0) {
                    pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astSrc[i] =
                        pstNnieParam->astSegData[pstInputDataIdx->u32SegIdx].astDst[j];
                    break;
                }
            }
            SAMPLE_SVP_CHECK_EXPR_RET((j == pstNnieParam->pstModel->astSeg[pstInputDataIdx->u32SegIdx].u16DstNum),
                HI_FAILURE, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,can't find %d-th seg's %d-th src blob!\n",
                pstProcSegIdx->u32SegIdx, i);
        }
    }

    /* NNIE_Forward */
    s32Ret = HI_MPI_SVP_NNIE_Forward(&hSvpNnieHandle,
        pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astSrc,
        pstNnieParam->pstModel, pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst,
        &pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx], bInstant);
    SAMPLE_SVP_CHECK_EXPR_RET(s32Ret != HI_SUCCESS, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,HI_MPI_SVP_NNIE_Forward failed!\n");

    if (bInstant) {
        /* Wait NNIE finish */
        while (HI_ERR_SVP_NNIE_QUERY_TIMEOUT == (s32Ret =
            HI_MPI_SVP_NNIE_Query(pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx].enNnieId,
            hSvpNnieHandle, &bFinish, HI_TRUE))) {
            usleep(USLEEP_TIME);
            SAMPLE_SVP_TRACE(SAMPLE_SVP_ERR_LEVEL_INFO,
                "HI_MPI_SVP_NNIE_Query Query timeout!\n");
        }
    }
    u32TotalStepNum = 0;

    for (i = 0; i < pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx].u32DstNum; i++) {
        if (SVP_BLOB_TYPE_SEQ_S32 == pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].enType) {
            for (j = 0; j < pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u32Num; j++) {
                u32TotalStepNum += *(SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_U32,
                    pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].unShape.stSeq.u64VirAddrStep) + j);
            }
            SAMPLE_COMM_SVP_FlushCache(pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u64PhyAddr,
                SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_VOID,
                pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u64VirAddr),
                u32TotalStepNum*pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u32Stride);
        } else {
            SAMPLE_COMM_SVP_FlushCache(pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u64PhyAddr,
                SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_VOID,
                pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u64VirAddr),
                pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u32Num*
                pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].unShape.stWhc.u32Chn*
                pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].unShape.stWhc.u32Height*
                pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u32Stride);
        }
    }

    return s32Ret;
}

/* Calculate a U8C1 image */
int CnnCalU8c1Img(SAMPLE_SVP_NNIE_CFG_S* self,
    const IVE_IMAGE_S *img, RecogNumInfo resBuf[], int resSize, int* resLen)
{
    HI_S32 s32Ret;
    SAMPLE_SVP_NNIE_INPUT_DATA_INDEX_S stInputDataIdx = {0};
    SAMPLE_SVP_NNIE_PROCESS_SEG_INDEX_S stProcSegIdx = {0};

    /* Fill src data */
    self->pszPic = NULL;
    stInputDataIdx.u32SegIdx = 0;
    stInputDataIdx.u32NodeIdx = 0;
    s32Ret = FillNnieByImg(self, &g_stCnnNnieParam, 0, 0, img);
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, CNN_FAIL_1, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,SAMPLE_SVP_NNIE_FillSrcData failed!\n");

    /* NNIE process(process the 0-th segment) */
    stProcSegIdx.u32SegIdx = 0;
    s32Ret = SAMPLE_SVP_NNIE_Forward(&g_stCnnNnieParam, &stInputDataIdx, &stProcSegIdx, HI_TRUE);
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, CNN_FAIL_1, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,SAMPLE_SVP_NNIE_Forward failed!\n");

    /* Software process */
    /* if user has changed net struct, please make sure SAMPLE_SVP_NNIE_Cnn_GetTopN
     function's input datas are correct */
    s32Ret = SAMPLE_SVP_NNIE_Cnn_GetTopN(&g_stCnnNnieParam, &g_stCnnSoftwareParam);
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, CNN_FAIL_1, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,SAMPLE_SVP_NNIE_CnnGetTopN failed!\n");

    /* Print result */
    CnnFetchRes(&g_stCnnSoftwareParam.stGetTopN, g_stCnnSoftwareParam.u32TopN, resBuf, resSize, resLen);
    return 0;

    CNN_FAIL_1:
        return -1;
}

/* function : Yolov2 software para init */
static HI_S32 SampleSvpNnieYolov2SoftwareInit(SAMPLE_SVP_NNIE_CFG_S* pstCfg,
    SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam, SAMPLE_SVP_NNIE_YOLOV2_SOFTWARE_PARAM_S* pstSoftWareParam)
{
    HI_S32 s32Ret;
    HI_U32 u32ClassNum = 0;
    HI_U32 u32BboxNum;
    HI_U32 u32TotalSize = 0;
    HI_U32 u32DstRoiSize;
    HI_U32 u32DstScoreSize;
    HI_U32 u32ClassRoiNumSize;
    HI_U32 u32TmpBufTotalSize;
    HI_U64 u64PhyAddr = 0;
    HI_U8* pu8VirAddr = NULL;

    pstSoftWareParam->u32OriImHeight = pstNnieParam->astSegData[0].astSrc[0].unShape.stWhc.u32Height;
    pstSoftWareParam->u32OriImWidth = pstNnieParam->astSegData[0].astSrc[0].unShape.stWhc.u32Width;
    pstSoftWareParam->u32BboxNumEachGrid = 5; // 5: 2BboxNumEachGrid
    pstSoftWareParam->u32ClassNum = 1; // 5: class number
    pstSoftWareParam->u32GridNumHeight = 12; // 12: GridNumHeight
    pstSoftWareParam->u32GridNumWidth = 20; // 20: GridNumWidth
    pstSoftWareParam->u32NmsThresh = (HI_U32)(0.3f*SAMPLE_SVP_NNIE_QUANT_BASE);
    pstSoftWareParam->u32ConfThresh = (HI_U32)(0.25f*SAMPLE_SVP_NNIE_QUANT_BASE);
    pstSoftWareParam->u32MaxRoiNum = 10;  // 10: MaxRoiNum
    pstSoftWareParam->af32Bias[0] = 0.52; // 0.52: af32Bias[0] value
    pstSoftWareParam->af32Bias[1] = 0.61; // 0.61: af32Bias[1] value
    pstSoftWareParam->af32Bias[ARRAY_SUBSCRIPT_2] = 1.05; // 1.05: af32Bias[ARRAY_SUBSCRIPT_2] value
    pstSoftWareParam->af32Bias[ARRAY_SUBSCRIPT_3] = 1.12; // 1.12: af32Bias[ARRAY_SUBSCRIPT_3] value
    pstSoftWareParam->af32Bias[ARRAY_SUBSCRIPT_4] = 1.85; // 1.85: af32Bias[ARRAY_SUBSCRIPT_4] value
    pstSoftWareParam->af32Bias[ARRAY_SUBSCRIPT_5] = 2.05; // 2.05: af32Bias[ARRAY_SUBSCRIPT_5] value
    pstSoftWareParam->af32Bias[ARRAY_SUBSCRIPT_6] = 4.63; // 4.63: af32Bias[ARRAY_SUBSCRIPT_6] value
    pstSoftWareParam->af32Bias[ARRAY_SUBSCRIPT_7] = 4.49; // 4.49: af32Bias[ARRAY_SUBSCRIPT_7] value
    pstSoftWareParam->af32Bias[ARRAY_SUBSCRIPT_8] = 7.15; // 7.15: af32Bias[ARRAY_SUBSCRIPT_8] value
    pstSoftWareParam->af32Bias[ARRAY_SUBSCRIPT_9] = 7.56; // 7.56: af32Bias[ARRAY_SUBSCRIPT_9] value

    /* Malloc assist buffer memory */
    u32ClassNum = pstSoftWareParam->u32ClassNum + 1;
    u32BboxNum = pstSoftWareParam->u32BboxNumEachGrid*pstSoftWareParam->u32GridNumHeight*
        pstSoftWareParam->u32GridNumWidth;
    u32TmpBufTotalSize = SAMPLE_SVP_NNIE_Yolov2_GetResultTmpBuf(pstSoftWareParam);
    u32DstRoiSize = SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum * u32BboxNum * sizeof(HI_U32) * SAMPLE_SVP_NNIE_COORDI_NUM);
    u32DstScoreSize = SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum * u32BboxNum * sizeof(HI_U32));
    u32ClassRoiNumSize = SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum * sizeof(HI_U32));
    u32TotalSize = u32TotalSize + u32DstRoiSize + u32DstScoreSize + u32ClassRoiNumSize + u32TmpBufTotalSize;
    s32Ret = SAMPLE_COMM_SVP_MallocCached("SAMPLE_YOLOV2_INIT", NULL, (HI_U64*)&u64PhyAddr,
        (void**)&pu8VirAddr, u32TotalSize);
    SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,Malloc memory failed!\n");
    memset_s(pu8VirAddr, u32TotalSize, 0, u32TotalSize);
    SAMPLE_COMM_SVP_FlushCache(u64PhyAddr, (void*)pu8VirAddr, u32TotalSize);

   /* set each tmp buffer addr */
    pstSoftWareParam->stGetResultTmpBuf.u64PhyAddr = u64PhyAddr;
    pstSoftWareParam->stGetResultTmpBuf.u64VirAddr = (HI_U64)((HI_UL)pu8VirAddr);

    /* set result blob */
    pstSoftWareParam->stDstRoi.enType = SVP_BLOB_TYPE_S32;
    pstSoftWareParam->stDstRoi.u64PhyAddr = u64PhyAddr + u32TmpBufTotalSize;
    pstSoftWareParam->stDstRoi.u64VirAddr = (HI_U64)((HI_UL)pu8VirAddr + u32TmpBufTotalSize);
    pstSoftWareParam->stDstRoi.u32Stride = SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum *
        u32BboxNum * sizeof(HI_U32) * SAMPLE_SVP_NNIE_COORDI_NUM);
    pstSoftWareParam->stDstRoi.u32Num = 1;
    pstSoftWareParam->stDstRoi.unShape.stWhc.u32Chn = 1;
    pstSoftWareParam->stDstRoi.unShape.stWhc.u32Height = 1;
    pstSoftWareParam->stDstRoi.unShape.stWhc.u32Width = u32ClassNum *
        u32BboxNum*SAMPLE_SVP_NNIE_COORDI_NUM;

    pstSoftWareParam->stDstScore.enType = SVP_BLOB_TYPE_S32;
    pstSoftWareParam->stDstScore.u64PhyAddr = u64PhyAddr + u32TmpBufTotalSize + u32DstRoiSize;
    pstSoftWareParam->stDstScore.u64VirAddr = (HI_U64)((HI_UL)pu8VirAddr + u32TmpBufTotalSize + u32DstRoiSize);
    pstSoftWareParam->stDstScore.u32Stride = SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum * u32BboxNum * sizeof(HI_U32));
    pstSoftWareParam->stDstScore.u32Num = 1;
    pstSoftWareParam->stDstScore.unShape.stWhc.u32Chn = 1;
    pstSoftWareParam->stDstScore.unShape.stWhc.u32Height = 1;
    pstSoftWareParam->stDstScore.unShape.stWhc.u32Width = u32ClassNum*u32BboxNum;

    pstSoftWareParam->stClassRoiNum.enType = SVP_BLOB_TYPE_S32;
    pstSoftWareParam->stClassRoiNum.u64PhyAddr = u64PhyAddr + u32TmpBufTotalSize +
        u32DstRoiSize + u32DstScoreSize;
    pstSoftWareParam->stClassRoiNum.u64VirAddr = (HI_U64)((HI_UL)pu8VirAddr + u32TmpBufTotalSize +
        u32DstRoiSize + u32DstScoreSize);
    pstSoftWareParam->stClassRoiNum.u32Stride = SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum*sizeof(HI_U32));
    pstSoftWareParam->stClassRoiNum.u32Num = 1;
    pstSoftWareParam->stClassRoiNum.unShape.stWhc.u32Chn = 1;
    pstSoftWareParam->stClassRoiNum.unShape.stWhc.u32Height = 1;
    pstSoftWareParam->stClassRoiNum.unShape.stWhc.u32Width = u32ClassNum;

    return s32Ret;
}

/* function : Yolov2 software deinit */
static HI_S32 SampleSvpNnieYolov2SoftwareDeinit(SAMPLE_SVP_NNIE_YOLOV2_SOFTWARE_PARAM_S* pstSoftWareParam)
{
    HI_S32 s32Ret = HI_SUCCESS;
    SAMPLE_SVP_CHECK_EXPR_RET(pstSoftWareParam == NULL, HI_INVALID_VALUE, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error, pstSoftWareParam can't be NULL!\n");
    if (pstSoftWareParam->stGetResultTmpBuf.u64PhyAddr != 0 && pstSoftWareParam->stGetResultTmpBuf.u64VirAddr != 0) {
        SAMPLE_SVP_MMZ_FREE(pstSoftWareParam->stGetResultTmpBuf.u64PhyAddr,
            pstSoftWareParam->stGetResultTmpBuf.u64VirAddr);
        pstSoftWareParam->stGetResultTmpBuf.u64PhyAddr = 0;
        pstSoftWareParam->stGetResultTmpBuf.u64VirAddr = 0;
        pstSoftWareParam->stDstRoi.u64PhyAddr = 0;
        pstSoftWareParam->stDstRoi.u64VirAddr = 0;
        pstSoftWareParam->stDstScore.u64PhyAddr = 0;
        pstSoftWareParam->stDstScore.u64VirAddr = 0;
        pstSoftWareParam->stClassRoiNum.u64PhyAddr = 0;
        pstSoftWareParam->stClassRoiNum.u64VirAddr = 0;
    }
    return s32Ret;
}

/* function : Yolov2 Deinit */
static HI_S32 SampleSvpNnieYolov2Deinit(SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam,
    SAMPLE_SVP_NNIE_YOLOV2_SOFTWARE_PARAM_S* pstSoftWareParam, SAMPLE_SVP_NNIE_MODEL_S *pstNnieModel)
{
    HI_S32 s32Ret = HI_SUCCESS;
    /* hardware deinit */
    if (pstNnieParam != NULL) {
        s32Ret = SAMPLE_COMM_SVP_NNIE_ParamDeinit(pstNnieParam);
        SAMPLE_SVP_CHECK_EXPR_TRACE(HI_SUCCESS != s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
            "Error,SAMPLE_COMM_SVP_NNIE_ParamDeinit failed!\n");
    }
    /* software deinit */
    if (pstSoftWareParam != NULL) {
        s32Ret = SampleSvpNnieYolov2SoftwareDeinit(pstSoftWareParam);
        SAMPLE_SVP_CHECK_EXPR_TRACE(HI_SUCCESS != s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
            "Error,SampleSvpNnieYolov2SoftwareDeinit failed!\n");
    }
    /* model deinit */
    if (pstNnieModel != NULL) {
        s32Ret = SAMPLE_COMM_SVP_NNIE_UnloadModel(pstNnieModel);
        SAMPLE_SVP_CHECK_EXPR_TRACE(HI_SUCCESS != s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
            "Error,SAMPLE_COMM_SVP_NNIE_UnloadModel failed!\n");
    }
    return s32Ret;
}

/* function : Yolov2 init */
static HI_S32 SampleSvpNnieYolov2ParamInit(SAMPLE_SVP_NNIE_CFG_S* pstCfg,
    SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam, SAMPLE_SVP_NNIE_YOLOV2_SOFTWARE_PARAM_S* pstSoftWareParam)
{
    HI_S32 s32Ret;
    /* init hardware para */
    s32Ret = SAMPLE_COMM_SVP_NNIE_ParamInit(pstCfg, pstNnieParam);
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, INIT_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error(%#x),SAMPLE_COMM_SVP_NNIE_ParamInit failed!\n", s32Ret);

    /* init software para */
    s32Ret = SampleSvpNnieYolov2SoftwareInit(pstCfg, pstNnieParam,
        pstSoftWareParam);
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, INIT_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error(%#x),SAMPLE_SVP_NNIE_Yolov1_SoftwareInit failed!\n", s32Ret);
    return s32Ret;
INIT_FAIL_0:
    s32Ret = SampleSvpNnieYolov2Deinit(pstNnieParam, pstSoftWareParam, NULL);
    SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error(%#x),SAMPLE_SVP_NNIE_Yolov1_Deinit failed!\n", s32Ret);
    return HI_FAILURE;
}

/* function : creat yolo2 model basad mode file */
int Yolo2Create(SAMPLE_SVP_NNIE_CFG_S **model, const char* modelFile)
{
    SAMPLE_SVP_NNIE_CFG_S *self;
    HI_U32 u32PicNum = 1;
    HI_S32 s32Ret;

    self = (SAMPLE_SVP_NNIE_CFG_S*)malloc(sizeof(*self));
    HI_ASSERT(self);
    memset_s(self, sizeof(*self), 0x00, sizeof(*self));

    // Set configuration parameter
    self->pszPic = NULL;
    self->u32MaxInputNum = u32PicNum; // max input image num in each batch
    self->u32MaxRoiNum = 0;
    self->aenNnieCoreId[0] = SVP_NNIE_ID_0; // set NNIE core

    // Yolov2 Load model
    SAMPLE_SVP_TRACE_INFO("Yolov2 Load model!\n");
    s32Ret = SAMPLE_COMM_SVP_NNIE_LoadModel((char*)modelFile, &g_stYolov2Model);
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, YOLOV2_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error, SAMPLE_COMM_SVP_NNIE_LoadModel failed!\n");

    /* Yolov2 parameter initialization */
    /* Yolov2 software parameters are set in SampleSvpNnieYolov2SoftwareInit,
      if user has changed net struct, please make sure the parameter settings in
      SampleSvpNnieYolov2SoftwareInit function are correct */
    SAMPLE_SVP_TRACE_INFO("Yolov2 parameter initialization!\n");
    g_stYolov2NnieParam.pstModel = &g_stYolov2Model.stModel;
    s32Ret = SampleSvpNnieYolov2ParamInit(self, &g_stYolov2NnieParam, &g_stYolov2SoftwareParam);
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, YOLOV2_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,SampleSvpNnieYolov2ParamInit failed!\n");

    // model important info
    SAMPLE_PRT("model.base={ type=%x, frmNum=%u, chnNum=%u, w=%u, h=%u, stride=%u }\n",
        g_stYolov2NnieParam.astSegData[0].astSrc[0].enType,
        g_stYolov2NnieParam.astSegData[0].astSrc[0].u32Num,
        g_stYolov2NnieParam.astSegData[0].astSrc[0].unShape.stWhc.u32Chn,
        g_stYolov2NnieParam.astSegData[0].astSrc[0].unShape.stWhc.u32Width,
        g_stYolov2NnieParam.astSegData[0].astSrc[0].unShape.stWhc.u32Height,
        g_stYolov2NnieParam.astSegData[0].astSrc[0].u32Stride);
    SAMPLE_PRT("model.soft={ class=%u, ori.w=%u, ori.h=%u, bnum=%u, \
        grid.w=%u, grid.h=%u, nmsThresh=%u, confThresh=%u, u32MaxRoiNum=%u }\n",
        g_stYolov2SoftwareParam.u32ClassNum,
        g_stYolov2SoftwareParam.u32OriImWidth,
        g_stYolov2SoftwareParam.u32OriImHeight,
        g_stYolov2SoftwareParam.u32BboxNumEachGrid,
        g_stYolov2SoftwareParam.u32GridNumWidth,
        g_stYolov2SoftwareParam.u32GridNumHeight,
        g_stYolov2SoftwareParam.u32NmsThresh,
        g_stYolov2SoftwareParam.u32ConfThresh,
        g_stYolov2SoftwareParam.u32MaxRoiNum);

    *model = self;
    return 0;

    YOLOV2_FAIL_0:
        SAMPLE_PRT("Yolo2Create SampleSvpNnieYolov2Deinit\n");
        SampleSvpNnieYolov2Deinit(&g_stYolov2NnieParam, &g_stYolov2SoftwareParam, &g_stYolov2Model);
        *model = NULL;
        return -1;
}

/* function : destory yolo2 model */
void Yolo2Destory(SAMPLE_SVP_NNIE_CFG_S *self)
{
    SampleSvpNnieYolov2Deinit(&g_stYolov2NnieParam, &g_stYolov2SoftwareParam, &g_stYolov2Model);
    SAMPLE_COMM_SVP_CheckSysExit();
    free(self);
}

/* function : fetch result */
static void Yolo2FetchRes(SVP_BLOB_S *pstDstScore, SVP_BLOB_S *pstDstRoi, SVP_BLOB_S *pstClassRoiNum,
    DetectObjInfo resBuf[], int resSize, int* resLen)
{
    HI_U32 i;
    HI_U32 j;
    HI_U32 u32RoiNumBias = 0;
    HI_U32 u32ScoreBias;
    HI_U32 u32BboxBias;
    HI_FLOAT f32Score;
    HI_S32* ps32Score = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstDstScore->u64VirAddr);
    HI_S32* ps32Roi = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstDstRoi->u64VirAddr);
    HI_S32* ps32ClassRoiNum = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstClassRoiNum->u64VirAddr);
    HI_U32 u32ClassNum = pstClassRoiNum->unShape.stWhc.u32Width;

    HI_ASSERT(u32ClassNum == 2); // 2: the number of class
    HI_ASSERT(resSize > 0);
    int resId = 0;
    *resLen = 0;
    memset_s(resBuf, resSize * sizeof(resBuf[0]), 0x00, resSize * sizeof(resBuf[0]));

    u32RoiNumBias += ps32ClassRoiNum[0];
    for (i = 1; i < u32ClassNum; i++) {
        u32ScoreBias = u32RoiNumBias;
        u32BboxBias = u32RoiNumBias * SAMPLE_SVP_NNIE_COORDI_NUM;
        /* if the confidence score greater than result threshold, the result will be printed */
        if ((HI_FLOAT)ps32Score[u32ScoreBias] / SAMPLE_SVP_NNIE_QUANT_BASE >=
            THRESH_MIN && ps32ClassRoiNum[i] != 0) {
        }
        for (j = 0; j < (HI_U32)ps32ClassRoiNum[i]; j++) {
            f32Score = (HI_FLOAT)ps32Score[u32ScoreBias + j] / SAMPLE_SVP_NNIE_QUANT_BASE;
            if (f32Score < THRESH_MIN) {
                SAMPLE_PRT("f32Score:%.2f\n", f32Score);
                break;
            }
            if (resId >= resSize) {
                SAMPLE_PRT("yolo2 resBuf full\n");
                break;
            }
            resBuf[resId].cls = 1; // class 1
            resBuf[resId].score = f32Score;

            RectBox *box = &resBuf[resId].box;
            box->xmin = ps32Roi[u32BboxBias + j * SAMPLE_SVP_NNIE_COORDI_NUM];
            box->ymin = ps32Roi[u32BboxBias + j * SAMPLE_SVP_NNIE_COORDI_NUM + ARRAY_SUBSCRIPT_OFFSET_1];
            box->xmax = ps32Roi[u32BboxBias + j * SAMPLE_SVP_NNIE_COORDI_NUM + ARRAY_SUBSCRIPT_OFFSET_2];
            box->ymax = ps32Roi[u32BboxBias + j * SAMPLE_SVP_NNIE_COORDI_NUM + ARRAY_SUBSCRIPT_OFFSET_3];
            if (box->xmin >= box->xmax || box->ymin >= box->ymax) {
                SAMPLE_PRT("yolo1_orig: {%d, %d, %d, %d}, %f, discard for coord ERR\n",
                    box->xmin, box->ymin, box->xmax, box->ymax, f32Score);
            } else {
                resId++;
            }
        }
        u32RoiNumBias += ps32ClassRoiNum[i];
    }

    *resLen = resId;
}

/* function : calculation yuv image */
int Yolo2CalImg(SAMPLE_SVP_NNIE_CFG_S* self,
    const IVE_IMAGE_S *img, DetectObjInfo resBuf[], int resSize, int* resLen)
{
    SAMPLE_SVP_NNIE_INPUT_DATA_INDEX_S stInputDataIdx = {0};
    SAMPLE_SVP_NNIE_PROCESS_SEG_INDEX_S stProcSegIdx = {0};
    HI_S32 s32Ret;

    // Fill src data
    self->pszPic = NULL;
    stInputDataIdx.u32SegIdx = 0;
    stInputDataIdx.u32NodeIdx = 0;

    s32Ret = FillNnieByImg(self, &g_stYolov2NnieParam, 0, 0, img);
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, YOLOV2_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,SAMPLE_SVP_NNIE_FillSrcData failed!\n");

    // NNIE process(process the 0-th segment)
    stProcSegIdx.u32SegIdx = 0;
    s32Ret = SAMPLE_SVP_NNIE_Forward(&g_stYolov2NnieParam, &stInputDataIdx, &stProcSegIdx, HI_TRUE);
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, YOLOV2_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,SAMPLE_SVP_NNIE_Forward failed!\n");

    /* Software process */
    /* if user has changed net struct, please make sure SAMPLE_SVP_NNIE_Yolov2_GetResult
     function input datas are correct */
    s32Ret = SAMPLE_SVP_NNIE_Yolov2_GetResult(&g_stYolov2NnieParam, &g_stYolov2SoftwareParam);
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, YOLOV2_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,SAMPLE_SVP_NNIE_Yolov2_GetResult failed!\n");

    Yolo2FetchRes(&g_stYolov2SoftwareParam.stDstScore,
        &g_stYolov2SoftwareParam.stDstRoi, &g_stYolov2SoftwareParam.stClassRoiNum, resBuf, resSize, resLen);
    return 0;

    YOLOV2_FAIL_0:
        return -1;
}
/**********************/
/* function : RetinaFace Deinit */
static HI_S32 NNIE_RetinaFace_Deinit(SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam,
    SAMPLE_SVP_NNIE_MODEL_S *pstNnieModel)
{
    HI_S32 s32Ret = HI_SUCCESS;
    /* hardware deinit */
    if (pstNnieParam != NULL) {
        s32Ret = SAMPLE_COMM_SVP_NNIE_ParamDeinit(pstNnieParam);
        SAMPLE_SVP_CHECK_EXPR_TRACE(HI_SUCCESS != s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
            "Error,SAMPLE_COMM_SVP_NNIE_ParamDeinit failed!\n");
    }
    /* model deinit */
    if (pstNnieModel != NULL) {
        s32Ret = SAMPLE_COMM_SVP_NNIE_UnloadModel(pstNnieModel);
        SAMPLE_SVP_CHECK_EXPR_TRACE(HI_SUCCESS != s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
            "Error,SAMPLE_COMM_SVP_NNIE_UnloadModel failed!\n");
    }
    return s32Ret;
}
/***************************/
void  CnnFaceDetectDestroy(SAMPLE_SVP_NNIE_CFG_S *self)
{
    HI_S32 s32Ret;

    /* Remove TskBuf */
    s32Ret = HI_MPI_SVP_NNIE_RemoveTskBuf(&(g_stRetinaFaceNnieParam.astForwardCtrl[0].stTskBuf));
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, RETINA_FACE_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,HI_MPI_SVP_NNIE_RemoveTskBuf failed!\n");

    RETINA_FACE_FAIL_0:
        NNIE_RetinaFace_Deinit(&g_stRetinaFaceNnieParam, &g_stRetinaFaceModel);
        free(self);
}
/***************************/
/******************************************************************************
* function : 打印crect2f_t信息
******************************************************************************/
void print_crect2f(crect2f_t* cr) {
    printf("rect %f %f %f %f\n", cr->val[0], cr->val[1], cr->val[2], cr->val[3]);
}

/******************************************************************************
* function : 打印人脸检测回归框和关键点信息
******************************************************************************/
void print_anchor(anchor_t anchor) {
    printf("--finalbox %f %f %f %f, score %f\n", anchor.finalbox.x1, anchor.finalbox.y1, anchor.finalbox.x2, anchor.finalbox.y2, anchor.score);
    printf("--landmarks ");
    int size = sizeof(anchor.pts) / sizeof(anchor.pts[0]);
    for (int i = 0; i < size; ++i) {
        printf("%f %f, ", anchor.pts[i].x, anchor.pts[i].y);
    }
    printf("\n");
}
/******************************************************************************
* function : ratio初始化
******************************************************************************/
void ratio_enum(crect2f_t anchor, const float ratios, crect2f_t** ratio_anchors) {
    if (ratio_anchors == NULL || ratio_anchors[0] == NULL)
        printf("ratio_anchors mem malloc error!");
    float w = anchor.val[2] - anchor.val[0] + 1;
    float h = anchor.val[3] - anchor.val[1] + 1;
    float x_ctr = anchor.val[0] + 0.5f * (w - 1);
    float y_ctr = anchor.val[1] + 0.5f * (h - 1);

    float sz = w * h;
    float r = ratios;
    float size_ratios = sz / r;
    float ws = sqrt(size_ratios);
    float hs = ws * r;
    ratio_anchors[0]->val[0] = x_ctr - 0.5f * (ws - 1);
    ratio_anchors[0]->val[1] = y_ctr - 0.5f * (hs - 1);
    ratio_anchors[0]->val[2] = x_ctr + 0.5f * (ws - 1);
    ratio_anchors[0]->val[3] = y_ctr + 0.5f * (hs - 1);
}

/******************************************************************************
* function : scale初始化
******************************************************************************/
void scale_enum(crect2f_t** ratio_anchor, const float* scales, crect2f_t** scale_anchors) {
    float w, h, x_ctr, y_ctr, ws, hs;
    if (scale_anchors == NULL)
    {
        printf("scale_anchors mem malloc error!");
    }
    for (int a = 0; a < RATIOS_SIZE; ++a) {
        w = ratio_anchor[a]->val[2] - ratio_anchor[a]->val[0] + 1;
        h = ratio_anchor[a]->val[3] - ratio_anchor[a]->val[1] + 1;
        x_ctr = ratio_anchor[a]->val[0] + 0.5f * (w - 1);
        y_ctr = ratio_anchor[a]->val[1] + 0.5f * (h - 1);

        for (int s = 0; s < SCALES_SIZE; ++s) {
            ws = w * scales[s];
            hs = h * scales[s];
            scale_anchors[a * RATIOS_SIZE + s]->val[0] = x_ctr - 0.5f * (ws - 1);
            scale_anchors[a * RATIOS_SIZE + s]->val[1] = y_ctr - 0.5f * (hs - 1);
            scale_anchors[a * RATIOS_SIZE + s]->val[2] = x_ctr + 0.5f * (ws - 1);
            scale_anchors[a * RATIOS_SIZE + s]->val[3] = y_ctr + 0.5f * (hs - 1);
        }
    }
}
/******************************************************************************
* function : 根据anchor和预测值计算真实bbox坐标
******************************************************************************/
void bbox_proc(const crect2f_t anchor, const crect2f_t delta, rect_t* box) {
    float w = anchor.val[2] - anchor.val[0] + 1;
    float h = anchor.val[3] - anchor.val[1] + 1;
    float x_ctr = anchor.val[0] + 0.5 * (w - 1);
    float y_ctr = anchor.val[1] + 0.5 * (h - 1);

    float dx = delta.val[0];
    float dy = delta.val[1];
    float dw = delta.val[2];
    float dh = delta.val[3];

    float pred_ctr_x = dx * w + x_ctr;
    float pred_ctr_y = dy * h + y_ctr;
    float pred_w = exp(dw) * w;
    float pred_h = exp(dh) * h;

    (*box).x1 = pred_ctr_x - 0.5 * (pred_w - 1.0);
    (*box).y1 = pred_ctr_y - 0.5 * (pred_h - 1.0);
    (*box).x2 = pred_ctr_x + 0.5 * (pred_w - 1.0);
    (*box).y2 = pred_ctr_y + 0.5 * (pred_h - 1.0);
}

/******************************************************************************
* function : 根据anchor和预测值计算真实landmark坐标
******************************************************************************/
void landmark_proc(const crect2f_t anchor, const point_t* delta, anchor_t* proposals) {
    float w = anchor.val[2] - anchor.val[0] + 1;
    float h = anchor.val[3] - anchor.val[1] + 1;
    float x_ctr = anchor.val[0] + 0.5 * (w - 1);
    float y_ctr = anchor.val[1] + 0.5 * (h - 1);

    for (int i = 0; i < LANDMARKS; ++i) {
        (*proposals).pts[i].x = delta[i].x * w + x_ctr;
        (*proposals).pts[i].y = delta[i].y * h + y_ctr;
    }
}
/******************************************************************************
* function : qsort排序依据函数
******************************************************************************/
int cmp(const void *a, const void *b)
{
    float c = (*(anchor_t*)a).score;
    float d = (*(anchor_t*)b).score;
    return c <= d ? 1 : -1;
}

/******************************************************************************
* function : 人脸检测的NMS函数
******************************************************************************/
void mnet_nms(anchor_t* boxes, int total, float threshold, list_t* results) {

    int size = total;
    if (size == 0)
        return;
    list_t* idx = list_new();
    list_t* tmp = list_new();
    int tmp_i;
    int i = 0;
    qsort(boxes, size, sizeof(boxes[0]), cmp);
    for (i = 0; i < size; i++)
    {
        if(IsDebugLog)
            printf("sort:%f", boxes[i].score);
        list_rpush(idx, list_node_new(&IndexBuffer[i]));
        if(IsDebugLog)
            printf("idx: %d", *(int*)idx->tail->val);
    }
    if(IsDebugLog)
        printf(" size : %d", idx->len);
    while (idx->len > 0)
    {
        int good_idx = *(int*)list_at(idx, 0)->val;
        list_rpush(results, list_node_new(&boxes[good_idx]));
        tmp = list_new();
        for (i = 0; i < idx->len; i++)
        {
            int a = *(int*)(list_at(idx, i)->val);
            list_rpush(tmp, list_node_new(&IndexBuffer[a]));
        }
        if(IsDebugLog)
            for (i = 0; i < idx->len; i++)
            {
                printf(" tmp : %d", *(int*)list_at(idx, i)->val);
            }
        list_clear(idx);

        for (i = 1; i < tmp->len; i++)
        {
            tmp_i = *(int*)list_at(tmp, i)->val;
            if(IsDebugLog)
            {
                printf("\ntmp_i : %d good_i: %d\n", tmp_i, good_idx);
                printf("x : %f y: %f w:%f h%f\n", boxes[good_idx].finalbox.x1, boxes[good_idx].finalbox.y1, boxes[good_idx].finalbox.x2, boxes[good_idx].finalbox.y2);
            }
            float inter_x1 = MAX(boxes[good_idx].finalbox.x1, boxes[tmp_i].finalbox.x1);
            float inter_y1 = MAX(boxes[good_idx].finalbox.y1, boxes[tmp_i].finalbox.y1);
            float inter_x2 = MIN(boxes[good_idx].finalbox.x2, boxes[tmp_i].finalbox.x2);
            float inter_y2 = MIN(boxes[good_idx].finalbox.y2, boxes[tmp_i].finalbox.y2);

            float w = MAX((inter_x2 - inter_x1 + 1), 0.0F);
            float h = MAX((inter_y2 - inter_y1 + 1), 0.0F);

            float inter_area = w * h;
            float area_1 = (boxes[good_idx].finalbox.y2 - boxes[good_idx].finalbox.y1) * (boxes[good_idx].finalbox.x2 - boxes[good_idx].finalbox.x1);
            float area_2 = (boxes[tmp_i].finalbox.y2 - boxes[tmp_i].finalbox.y1) * (boxes[tmp_i].finalbox.x2 - boxes[tmp_i].finalbox.x1);
            float o = inter_area / (area_1 + area_2 - inter_area);

            if (o <= threshold)
                list_rpush(idx, list_node_new(&IndexBuffer[tmp_i]));
        }
        list_destroy(tmp);
    }
    list_destroy(idx);
}
/******************************************************************************
* function : 初始化anchor
******************************************************************************/
int anchor_init(anchor_generator_t* ag, int stride, const anchor_cfg_t cfg, int dense_anchor) {
    crect2f_t base_anchor;
    base_anchor.val[0] = 0;
    base_anchor.val[1] = 0;
    base_anchor.val[2] = cfg.BASE_SIZE - 1;
    base_anchor.val[3] = cfg.BASE_SIZE - 1;
    
    //base_size = 16
    crect2f_t** ratio_anchors = NULL;
    (*ag).preset_anchors = NULL;
    // get ratio anchors
    ratio_anchors = (crect2f_t**)malloc(sizeof(crect2f_t*));
    ratio_anchors[0] = (crect2f_t*)malloc(sizeof(crect2f_t));
    ratio_enum(base_anchor, cfg.RATIOS, ratio_anchors);
    //printf("=======%d==========", sizeof(ratio_anchors) / sizeof(ratio_anchors[0]));
    //print_crect2f(ratio_anchors[0]);
    if (ratio_anchors == NULL || *ratio_anchors == NULL)
        printf("ratio_anchors mem alloc error!");

    int ratio_size = RATIOS_SIZE;
    int scales_size = SCALES_SIZE;
    int i = 0;
    //printf("ratio_size & scales_size : %d, %d \n", ratio_size, scales_size);
    (*ag).anchor_num = ratio_size * scales_size;
    (*ag).preset_anchors = (crect2f_t**)malloc(sizeof(crect2f_t*) * (*ag).anchor_num);
    for (i = 0; i < (*ag).anchor_num; i++)
    {
        (*ag).preset_anchors[i] = (crect2f_t*)malloc(sizeof(crect2f_t));
    }

    scale_enum(ratio_anchors, cfg.SCALES, (*ag).preset_anchors);

    free(*ratio_anchors);
    ratio_anchors = NULL;
    free(ratio_anchors);
    ratio_anchors = NULL;
    // printf("\n num : %d \n", (*ag).anchor_num);
    (*ag).anchor_stride = stride;
    return (*ag).anchor_num;
}
/******************************************************************************
* function : 根据anchor和threshold过滤预测结局，可以过滤掉大部分的框
******************************************************************************/
int filter_anchor(SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam, int stride_index, anchor_generator_t* ag) {

    HI_S32 reg_w = pstNnieParam->astSegData[0].astDst[stride_index + FMC * 0].unShape.stWhc.u32Width;
    HI_S32 reg_h = pstNnieParam->astSegData[0].astDst[stride_index + FMC * 0].unShape.stWhc.u32Height;
    HI_S32 reg_c = pstNnieParam->astSegData[0].astDst[stride_index + FMC * 0].unShape.stWhc.u32Chn;

    HI_S32 pts_w = pstNnieParam->astSegData[0].astDst[stride_index + FMC * 1].unShape.stWhc.u32Width;
    HI_S32 pts_h = pstNnieParam->astSegData[0].astDst[stride_index + FMC * 1].unShape.stWhc.u32Height;
    HI_S32 pts_c = pstNnieParam->astSegData[0].astDst[stride_index + FMC * 1].unShape.stWhc.u32Chn;

    HI_S32 cls_w = pstNnieParam->astSegData[0].astDst[stride_index + FMC * 2].unShape.stWhc.u32Width;
    HI_S32 cls_h = pstNnieParam->astSegData[0].astDst[stride_index + FMC * 2].unShape.stWhc.u32Height;
    HI_S32 cls_c = pstNnieParam->astSegData[0].astDst[stride_index + FMC * 2].unShape.stWhc.u32Chn;

    HI_S32* reg = (HI_S32* )((HI_U8* )pstNnieParam->astSegData[0].astDst[stride_index + FMC * 0].u64VirAddr);
    HI_S32* pts = (HI_S32* )((HI_U8* )pstNnieParam->astSegData[0].astDst[stride_index + FMC * 1].u64VirAddr);
    HI_S32* cls = (HI_S32* )((HI_U8* )pstNnieParam->astSegData[0].astDst[stride_index + FMC * 2].u64VirAddr);
    if(IsDebugLog)
    {
        printf("cls %d %d %d\n", cls_c, cls_h, cls_w);
        printf("reg %d %d %d\n", reg_c, reg_h, reg_w);
        printf("pts %d %d %d\n", pts_c, pts_h, pts_w);
    }


    //assert(cls_c == (*ag).anchor_num * 2);
    //assert(reg_c == (*ag).anchor_num * 4);
    HI_S32 pts_length = 0, proposal_size = 0;
    (*ag).pts_count = 0;
    HI_S32 anchor_num = (*ag).anchor_num;
    //assert(pts_c % (*ag).anchor_num == 0);
    pts_length = pts_c / (*ag).anchor_num / 2;

    HI_S32 i = 0, j = 0, a = 0, p = 0, id = 0, c = 0, h = 0, w = 0;
    crect2f_t box, delta;
    point_t pts_delta[LANDMARKS];
    float score_t = 0.f;

    HI_S32 c_pt = 0, h_pt = 0, w_pt = 0;
    if(IsDebugLog)
        printf("anchor_num %d\n", anchor_num);
    for (c = 0; c < anchor_num; ++c)
    {
        c_pt = (c + anchor_num) * cls_h * cls_w;
        for (h = 0; h < cls_h; ++h)
        {
            h_pt = c_pt + cls_w * h;
            for (w = 0; w < cls_w; ++w)
            {
                score_t = *(cls + (h_pt + w));
                if (score_t >= cls_threshold)
                {
                    proposal_size++;
                }
            }
        }
    }

    // printf("proposal_size %d\n", proposal_size);
    // if(!proposal_size)
    //  return 0;
    (*ag).proposal_size = proposal_size;
    (*ag).proposals = (anchor_t**)malloc(sizeof(anchor_t*) * proposal_size);
    for (i = 0; i < proposal_size; i++)
    {
        (*ag).proposals[i] = (anchor_t*)malloc(sizeof(anchor_t));
    }
    for (i = 0; i < cls_h; ++i) {
        for (j = 0; j < cls_w; ++j) {
            id = i * cls_w + j;
            for (a = 0; a < anchor_num; ++a)
            {
                score_t = *(cls + cls_w * cls_h * (a + anchor_num) + id);
                if (score_t >= cls_threshold) {
                    
                    // printf("cls %f ", score_t);

                    box.val[0] = j * (*ag).anchor_stride + (*ag).preset_anchors[a]->val[0];
                    box.val[1] = i * (*ag).anchor_stride + (*ag).preset_anchors[a]->val[1];
                    box.val[2] = j * (*ag).anchor_stride + (*ag).preset_anchors[a]->val[2];
                    box.val[3] = i * (*ag).anchor_stride + (*ag).preset_anchors[a]->val[3];
                    if(IsDebugLog)
                        printf("box : %f %f %f %f\n", box.val[0], box.val[1], box.val[2], box.val[3]);

                    delta.val[0] = *(reg + cls_w*cls_h*(a * 4 + 0) + id) / 4096.f;
                    delta.val[1] = *(reg + cls_w*cls_h*(a * 4 + 1) + id) / 4096.f;
                    delta.val[2] = *(reg + cls_w*cls_h*(a * 4 + 2) + id) / 4096.f;
                    delta.val[3] = *(reg + cls_w*cls_h*(a * 4 + 3) + id) / 4096.f;
                    if(IsDebugLog)
                        printf("delta : %f %f %f %f\n", delta.val[0], delta.val[1], delta.val[2], delta.val[3]);
            
                    (*ag).proposals[(*ag).pts_count]->anchor[0] = box.val[0];
                    (*ag).proposals[(*ag).pts_count]->anchor[1] = box.val[1];
                    (*ag).proposals[(*ag).pts_count]->anchor[2] = box.val[2];
                    (*ag).proposals[(*ag).pts_count]->anchor[3] = box.val[3];

                    bbox_proc(box, delta, &(*ag).proposals[(*ag).pts_count]->finalbox);
                    if(IsDebugLog)
                        printf("bbox pred\n");
                    (*ag).proposals[(*ag).pts_count]->score = score_t / 4096.f;
                    // printf("score: %f", (*ag).proposals[(*ag).pts_count]->score);
                    (*ag).proposals[(*ag).pts_count]->center_x = j;
                    (*ag).proposals[(*ag).pts_count]->center_y = i;
                    if(IsDebugLog)
                        printf("center %d %d\n", i, j);
                    for (p = 0; p < pts_length; ++p) {
                        pts_delta[p].x = *(pts + cls_w*cls_h*(a * pts_length * 2 + p * 2) + id) / 4096.f;
                        pts_delta[p].y = *(pts + cls_w*cls_h*(a * pts_length * 2 + p * 2 + 1) + id) / 4096.f; 
                    }
                    if(IsDebugLog)
                        printf("ready landmark_pred\n");
                    landmark_proc(box, pts_delta, &(*(*ag).proposals[(*ag).pts_count]));
                    if(IsDebugLog)
                        printf("landmark_pred\n");
    
                    (*ag).pts_count++;
                }
            }
        }
    }
    return 0;
}
/* Software process */
static HI_S32 SVP_NNIE_MNET(SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam,DetectFaceInfo resBuf[],int resSize,int* resLen)
{
    

    list_t * proposals_l_tmp = list_new();
    list_t * results = list_new();

    for (int i = 0; i < FMC; ++i) {
        filter_anchor(pstNnieParam, i, &anc_gen[i]);
        //printf("stride %d, res size %d\n", _feat_stride_fpn[i], proposals.size());
        for (int r = 0; r < anc_gen[i].pts_count; ++r) {
            //proposals[r].print();
            list_rpush(proposals_l_tmp, list_node_new(anc_gen[i].proposals[r]));
            //print_anchor(*(anchor_t *)proposals_l_tmp->tail->val);
        }
    }

    // filter_anchor(pstNnieParam, 1, &anc_gen[2]);
    if(IsDebugLog)
        for (int r = 0; r < proposals_l_tmp->len; ++r) {
            //proposals[r].print();
            print_anchor(*(anchor_t*)list_at(proposals_l_tmp, r)->val);
        }
    anchor_t* proposals = NULL;
    proposals = (anchor_t*)malloc(sizeof(anchor_t)*proposals_l_tmp->len);
    for (int i = 0; i < proposals_l_tmp->len; ++i)
    {
        proposals[i] = *(anchor_t *)list_at(proposals_l_tmp, i)->val;
    }
    mnet_nms(proposals, proposals_l_tmp->len, nms_threshold, results);
    memset(as32ResultDet, 0, sizeof(as32ResultDet));
    u32ResultDetCnt = results->len;  
    /*****************************************/
     memset_s(resBuf, resSize * sizeof(resBuf[0]), 0x00, resSize * sizeof(resBuf[0]));
    /*****************************************/
    for(int i = 0; i < results->len; i ++)
    {
        anchor_t res = *(anchor_t *)list_at(results, i)->val;
        //printf("result rect: %d, %f, %f, %f, %f\n", i ,res.finalbox.x1, res.finalbox.y1, res.finalbox.x2, res.finalbox.y2);
        as32ResultDet[i*15 + 0] = res.finalbox.x1;
        as32ResultDet[i*15 + 1] = res.finalbox.x1;
        as32ResultDet[i*15 + 2] = res.finalbox.y1;
        as32ResultDet[i*15 + 3] = res.finalbox.x2;
        as32ResultDet[i*15 + 4] = res.finalbox.y2;
       
       /******* Test **********/
        // for (int j = 0; j < LANDMARKS; ++j) {
        //     as32ResultDet[i*15 + j * 2 + 5] = res.pts[j].x;
        //     as32ResultDet[i*15 + j * 2 + 6] = res.pts[j].y;
        //printf("result lds: %d, %f, %f\n", j + 1 ,res.pts[j].x, res.pts[j].y);
         /*****************************************/ 
        //}
        
        resBuf[i].pts.Eye_L.x     =   res.pts[0].x;
        resBuf[i].pts.Eye_L.y     =   res.pts[0].y;
        resBuf[i].pts.Eye_R.x     =   res.pts[1].x;
        resBuf[i].pts.Eye_R.y     =   res.pts[1].y;
        resBuf[i].pts.Nose.x      =   res.pts[2].x;
        resBuf[i].pts.Nose.y      =   res.pts[2].y;
        resBuf[i].pts.Mouth_L.x   =   res.pts[3].x;
        resBuf[i].pts.Mouth_L.y   =   res.pts[3].y;
        resBuf[i].pts.Mouth_R.x   =   res.pts[4].x;
        resBuf[i].pts.Mouth_R.y   =   res.pts[4].y;

        resBuf[i].box.xmin =  (int)res.finalbox.x1;
        resBuf[i].box.ymin =  (int)res.finalbox.y1;
        resBuf[i].box.xmax =  (int)res.finalbox.x2;
        resBuf[i].box.ymax =  (int)res.finalbox.y2;
    }
        *resLen = u32ResultDetCnt;   //获取脸的数量
        /*****************************************/
    for (int n = 0; n < FMC; n++)
    {
        for (int i = 0; i < anc_gen[n].proposal_size; i++)
        {
            if (anc_gen[n].proposals[i] != NULL)
            {
                free(anc_gen[n].proposals[i]);
                anc_gen[n].proposals[i] = NULL;
            }
        }
        if (anc_gen[n].proposals != NULL)
        {
            free(anc_gen[n].proposals);
            anc_gen[n].proposals = NULL;
        }
        anc_gen[n].proposal_size = 0;
    }
    list_destroy(results);
    list_destroy(proposals_l_tmp);
    if (proposals != NULL)
    {
        free(proposals);
        proposals = NULL;
    }
    
}
/* function : calculation yuv image */
int RetinaFaceCalImg(SAMPLE_SVP_NNIE_CFG_S* self,const IVE_IMAGE_S *img,DetectFaceInfo resBuf[],int resize,int* resLen)
{
    SAMPLE_SVP_NNIE_INPUT_DATA_INDEX_S stInputDataIdx = {0};
    SAMPLE_SVP_NNIE_PROCESS_SEG_INDEX_S stProcSegIdx = {0};
    HI_S32 s32Ret;

    // Fill src data
    self->pszPic = NULL;
    stInputDataIdx.u32SegIdx = 0;
    stInputDataIdx.u32NodeIdx = 0;
	
    s32Ret = FillNnieByImg(self, &g_stRetinaFaceNnieParam, 0, 0, img);
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, RETINAFACE_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,SAMPLE_SVP_NNIE_FillSrcData failed!\n");
    
    // NNIE process(process the 0-th segment)
    stProcSegIdx.u32SegIdx = 0;
    s32Ret = SAMPLE_SVP_NNIE_Forward(&g_stRetinaFaceNnieParam, &stInputDataIdx, &stProcSegIdx, HI_TRUE);
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, RETINAFACE_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,SAMPLE_SVP_NNIE_Forward failed!\n");

    /* Software process */
	SVP_NNIE_MNET(&g_stRetinaFaceNnieParam,resBuf,resize,resLen);
	
    return 0;
  
    RETINAFACE_FAIL_0:
        return -1;
}

/******************************************************************************
* function : NNIE参数初始化
******************************************************************************/
static HI_S32 SampleSvpNnieRetinaFaceParamInit(SAMPLE_SVP_NNIE_CFG_S* pstCfg,
    SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam)
{
    HI_S32 s32Ret;
    /* init hardware para */
    s32Ret = SAMPLE_COMM_SVP_NNIE_ParamInit(pstCfg, pstNnieParam);
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, INIT_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error(%#x),SAMPLE_COMM_SVP_NNIE_ParamInit failed!\n", s32Ret);
    
    return s32Ret;

INIT_FAIL_0:
    s32Ret = NNIE_RetinaFace_Deinit(pstNnieParam, NULL);
    SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error(%#x),SAMPLE_SVP_NNIE_RetinaFace_Deinit failed!\n", s32Ret);
    return HI_FAILURE;
}
/******************************************************************************
* function : 模型参数初始化
******************************************************************************/
void FACE_DETECTOR_PARAM_INIT(float threshold, int isLog)
{
    memset(as32ResultDet, 0, sizeof(as32ResultDet));
    u32ResultDetCnt = 0;
    IsDebugLog = isLog;
    cls_threshold = threshold * QUANT_BASE;
    for (int i = 0; i < 512; i++)
    {
        IndexBuffer[i] = i;
    }
    anc_gen = (anchor_generator_t*)malloc(sizeof(anchor_generator_t) * FMC);

    for (HI_S32 i = 0; i < FMC; i++)
    {
    	//anchor_base包含 不同网格大小对应不同的步长，及两个先验像素比例与比例因子(倍数)
    	//anchor_init初始化，FMC对应3个输出特征层网格
        anchor_init(&anc_gen[i], anchor_base[i].stride, anchor_base[i], 0);
        if (IsDebugLog)
        {	
            printf("\n anchor base : %d , %f , %f , %f \n", anchor_base[i].stride, anchor_base[i].SCALES[0], anchor_base[i].SCALES[1], anchor_base[i].RATIOS);
            for (int j = 0; j < anc_gen[i].anchor_num; ++j)
                print_crect2f(anc_gen[i].preset_anchors[j]);
        }
    }
}
/******************************************************************************
* function : 基于模型文件创建模型
******************************************************************************/
int RetinaFaceCreate(SAMPLE_SVP_NNIE_CFG_S **model, const char* modelFile)
{
	
    SAMPLE_SVP_NNIE_CFG_S *self;
    HI_U32 u32PicNum = 1;
    HI_S32 s32Ret;

    self = (SAMPLE_SVP_NNIE_CFG_S*)malloc(sizeof(*self));
    HI_ASSERT(self);
    if (memset_s(self, sizeof(*self), 0x00, sizeof(*self)) != EOK) {
        HI_ASSERT(0);
    }

    // Set configuration parameter
    self->pszPic = NULL;
    self->u32MaxInputNum = u32PicNum; // max input image num in each batch
    self->u32MaxRoiNum = 0;
    self->aenNnieCoreId[0] = SVP_NNIE_ID_0; // set NNIE core

    // Sys init
    //SAMPLE_COMM_SVP_CheckSysInit();
	
    // CNN Load model
    SAMPLE_SVP_TRACE_INFO("RetinaFace Load model!\n");
    s32Ret = SAMPLE_COMM_SVP_NNIE_LoadModel((char*)modelFile, &g_stRetinaFaceModel);
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, RETINA_FACE_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,SAMPLE_COMM_SVP_NNIE_LoadModel failed!\n");
		
    // CNN parameter initialization
    SAMPLE_SVP_TRACE_INFO("RetinaFace parameter initialization!\n");
    g_stRetinaFaceNnieParam.pstModel = &g_stRetinaFaceModel.stModel;
	
    s32Ret = SampleSvpNnieRetinaFaceParamInit(self, &g_stRetinaFaceNnieParam);
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, RETINA_FACE_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,SAMPLE_SVP_NNIE_Cnn_ParamInit failed!\n");

	//SoftwareParam_INIT
	FACE_DETECTOR_PARAM_INIT(threshold, isLog);

	// record tskBuf
    s32Ret = HI_MPI_SVP_NNIE_AddTskBuf(&(g_stRetinaFaceNnieParam.astForwardCtrl[0].stTskBuf));
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, RETINA_FACE_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,HI_MPI_SVP_NNIE_AddTskBuf failed!\n");
    *model = self;
    return 0;

    RETINA_FACE_FAIL_0:
        NNIE_RetinaFace_Deinit(&g_stRetinaFaceNnieParam, &g_stRetinaFaceModel);
        *model = NULL;
        return -1;
}


/* function : MobileFace Deinit */
static HI_S32 NNIE_MobileFace_Deinit(SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam,
    SAMPLE_SVP_NNIE_MODEL_S *pstNnieModel)
{
    HI_S32 s32Ret = HI_SUCCESS;
    /* hardware deinit */
    if (pstNnieParam != NULL) {
        s32Ret = SAMPLE_COMM_SVP_NNIE_ParamDeinit(pstNnieParam);
        SAMPLE_SVP_CHECK_EXPR_TRACE(HI_SUCCESS != s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
            "Error,SAMPLE_COMM_SVP_NNIE_ParamDeinit failed!\n");
    }
    /* model deinit */
    if (pstNnieModel != NULL) {
        s32Ret = SAMPLE_COMM_SVP_NNIE_UnloadModel(pstNnieModel);
        SAMPLE_SVP_CHECK_EXPR_TRACE(HI_SUCCESS != s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
            "Error,SAMPLE_COMM_SVP_NNIE_UnloadModel failed!\n");
    }
    return s32Ret;
}

void  CnnFaceRecDestroy(SAMPLE_SVP_NNIE_CFG_S *self)
{
    HI_S32 s32Ret;

    /* Remove TskBuf */
    s32Ret = HI_MPI_SVP_NNIE_RemoveTskBuf(&(g_stMobileFaceNnieParam.astForwardCtrl[0].stTskBuf));
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, MOBILE_FACE_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,HI_MPI_SVP_NNIE_RemoveTskBuf failed!\n");

    MOBILE_FACE_FAIL_0:
        NNIE_MobileFace_Deinit(&g_stMobileFaceNnieParam, &g_stMobileFaceModel);
        free(self);
}

/* function : calculation yuv image */
int MobileFaceCalImg(SAMPLE_SVP_NNIE_CFG_S* self,const IVE_IMAGE_S *img,float feature_buff[])
{
    SAMPLE_SVP_NNIE_INPUT_DATA_INDEX_S stInputDataIdx = {0};
    SAMPLE_SVP_NNIE_PROCESS_SEG_INDEX_S stProcSegIdx = {0};
    HI_S32 s32Ret;
    IVE_IMAGE_S iimg = *img; 

    // Fill src data
    self->pszPic = NULL;
    stInputDataIdx.u32SegIdx = 0;
    stInputDataIdx.u32NodeIdx = 0;
	//printf("img.h = %d ,img.w = %d\n",iimg.u32Height,iimg.u32Width);

    s32Ret = FillNnieByImg(self, &g_stMobileFaceNnieParam, 0, 0, img);
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, MOBILE_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,SAMPLE_SVP_NNIE_FillSrcData failed!\n");

    // NNIE process(process the 0-th segment)
    stProcSegIdx.u32SegIdx = 0;
    s32Ret = SAMPLE_SVP_NNIE_Forward(&g_stMobileFaceNnieParam, &stInputDataIdx, &stProcSegIdx, HI_TRUE);
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, MOBILE_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,SAMPLE_SVP_NNIE_Forward failed!\n");
    /* Software process */
	/*Print results*/
    {
        // printf("features:\n{\n");
        // printf("stride: %d\n",g_stMobileFaceNnieParam.astSegData[0].astDst[0].u32Stride);
        // printf("blob type :%d\n",g_stMobileFaceNnieParam.astSegData[0].astDst[0].enType);
        // printf("{\n\tc :%d", g_stMobileFaceNnieParam.astSegData[0].astDst[0].unShape.stWhc.u32Chn);
        // printf("\n\th :%d", g_stMobileFaceNnieParam.astSegData[0].astDst[0].unShape.stWhc.u32Height);
        // printf("\n\tw :%d \n}\n", g_stMobileFaceNnieParam.astSegData[0].astDst[0].unShape.stWhc.u32Width);
        HI_S32* ps32Score = (HI_S32* )((HI_U8* )g_stMobileFaceNnieParam.astSegData[0].astDst[0].u64VirAddr);
        // printf("blobs fc1:\n[");
        for(HI_U32 i = 0; i < 128; i++)
        {
            feature_buff[i] = *(ps32Score + i) / 4096.f;
        }
        
        // printf("]\n}\n");
        // for(int f = 0; f < 512; ++f)
        // {
        //     printf("%f ,",feature_buff[f]);
        //     feature_buff[f] = 0;
        // }
        // printf("]\n");
    }
	
    return 0;
  
    MOBILE_FAIL_0:
        return -1;
}
/******************************************************************************
* function : NNIE参数初始化
******************************************************************************/
static HI_S32 SampleSvpNnieMobileFaceParamInit(SAMPLE_SVP_NNIE_CFG_S* pstCfg,
    SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam)
{
    HI_S32 s32Ret;
    /* init hardware para */
    s32Ret = SAMPLE_COMM_SVP_NNIE_ParamInit(pstCfg, pstNnieParam);
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, INIT_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error(%#x),SAMPLE_COMM_SVP_NNIE_ParamInit failed!\n", s32Ret);
    
    return s32Ret;

INIT_FAIL_0:
    s32Ret = NNIE_MobileFace_Deinit(pstNnieParam, NULL);
    SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error(%#x),SAMPLE_SVP_NNIE_MobileFace_Deinit failed!\n", s32Ret);
    return HI_FAILURE;
}
/******************************************************************************
* function : 基于模型文件创建模型
******************************************************************************/
int MobileFaceCreate(SAMPLE_SVP_NNIE_CFG_S **model, const char* modelFile)
{
	
    SAMPLE_SVP_NNIE_CFG_S *self;
    HI_U32 u32PicNum = 1;
    HI_S32 s32Ret;

    self = (SAMPLE_SVP_NNIE_CFG_S*)malloc(sizeof(*self));
    HI_ASSERT(self);
    if (memset_s(self, sizeof(*self), 0x00, sizeof(*self)) != EOK) {
        HI_ASSERT(0);
    }

    // Set configuration parameter
    self->pszPic = NULL;
    self->u32MaxInputNum = u32PicNum; // max input image num in each batch
    self->u32MaxRoiNum = 0;
    self->aenNnieCoreId[0] = SVP_NNIE_ID_0; // set NNIE core

    // Sys init
	
    // CNN Load model
    SAMPLE_SVP_TRACE_INFO("MobileFace Load model!\n");
    s32Ret = SAMPLE_COMM_SVP_NNIE_LoadModel((char*)modelFile, &g_stMobileFaceModel);
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, MOBILE_FACE_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,SAMPLE_COMM_SVP_NNIE_LoadModel failed!\n");
		
    // CNN parameter initialization
    SAMPLE_SVP_TRACE_INFO("MobileFace parameter initialization!\n");
    g_stMobileFaceNnieParam.pstModel = &g_stMobileFaceModel.stModel;
	
    s32Ret = SampleSvpNnieMobileFaceParamInit(self, &g_stMobileFaceNnieParam);
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, MOBILE_FACE_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,SAMPLE_SVP_NNIE_Cnn_ParamInit failed!\n");

	// record tskBuf
    s32Ret = HI_MPI_SVP_NNIE_AddTskBuf(&(g_stMobileFaceNnieParam.astForwardCtrl[0].stTskBuf));
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, MOBILE_FACE_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,HI_MPI_SVP_NNIE_AddTskBuf failed!\n");
    *model = self;
    return 0;

    MOBILE_FACE_FAIL_0:
        NNIE_MobileFace_Deinit(&g_stMobileFaceNnieParam, &g_stMobileFaceModel);
        *model = NULL;
        return -1;
}
/**********************/
/* function : YoloFastestXL software deinit */
static HI_S32 SampleSvpNnieYoloFastestXLSoftwareDeinit(SAMPLE_SVP_NNIE_YOLOFASTESTXL_SOFTWARE_PARAM_S* pstSoftWareParam)
{
    HI_S32 s32Ret = HI_SUCCESS;
    SAMPLE_SVP_CHECK_EXPR_RET(pstSoftWareParam == NULL, HI_INVALID_VALUE, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error, pstSoftWareParam can't be NULL!\n");
    if (pstSoftWareParam->stGetResultTmpBuf.u64PhyAddr != 0 && pstSoftWareParam->stGetResultTmpBuf.u64VirAddr != 0) {
        SAMPLE_SVP_MMZ_FREE(pstSoftWareParam->stGetResultTmpBuf.u64PhyAddr,
            pstSoftWareParam->stGetResultTmpBuf.u64VirAddr);
        pstSoftWareParam->stGetResultTmpBuf.u64PhyAddr = 0;
        pstSoftWareParam->stGetResultTmpBuf.u64VirAddr = 0;
        pstSoftWareParam->stDstRoi.u64PhyAddr = 0;
        pstSoftWareParam->stDstRoi.u64VirAddr = 0;
        pstSoftWareParam->stDstScore.u64PhyAddr = 0;
        pstSoftWareParam->stDstScore.u64VirAddr = 0;
        pstSoftWareParam->stClassRoiNum.u64PhyAddr = 0;
        pstSoftWareParam->stClassRoiNum.u64VirAddr = 0;
    }
    return s32Ret;
}
static HI_S32 NNIE_PartsDetect_Deinit(SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam,
    SAMPLE_SVP_NNIE_YOLOFASTESTXL_SOFTWARE_PARAM_S* pstSoftWareParam, SAMPLE_SVP_NNIE_MODEL_S *pstNnieModel)
{
    HI_S32 s32Ret = HI_SUCCESS;
    /* hardware deinit */
    if (pstNnieParam != NULL) {
        s32Ret = SAMPLE_COMM_SVP_NNIE_ParamDeinit(pstNnieParam);
        SAMPLE_SVP_CHECK_EXPR_TRACE(HI_SUCCESS != s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
            "Error,SAMPLE_COMM_SVP_NNIE_ParamDeinit failed!\n");
    }
    /* software deinit */
    if (pstSoftWareParam != NULL) {
        s32Ret = SampleSvpNnieYoloFastestXLSoftwareDeinit(pstSoftWareParam);
        SAMPLE_SVP_CHECK_EXPR_TRACE(HI_SUCCESS != s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
            "Error,SampleSvpNnieYoloFastestXLSoftwareDeinit failed!\n");
    }
    /* model deinit */
    if (pstNnieModel != NULL) {
        s32Ret = SAMPLE_COMM_SVP_NNIE_UnloadModel(pstNnieModel);
        SAMPLE_SVP_CHECK_EXPR_TRACE(HI_SUCCESS != s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
            "Error,SAMPLE_COMM_SVP_NNIE_UnloadModel failed!\n");
    }
    return s32Ret;
}

void  CnnPartsDetectDestroy(SAMPLE_SVP_NNIE_CFG_S *self)
{
    HI_S32 s32Ret;

    /* Remove TskBuf */
    s32Ret = HI_MPI_SVP_NNIE_RemoveTskBuf(&(g_styolofastestXLNnieParam.astForwardCtrl[0].stTskBuf));
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, YOLO_FASTEST_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,HI_MPI_SVP_NNIE_RemoveTskBuf failed!\n");

    YOLO_FASTEST_FAIL_0:
       NNIE_PartsDetect_Deinit(&g_styolofastestXLNnieParam, &g_stYolofastestXLSoftwareParam, &g_styolofastestXLModel);
        free(self);
}

/* function : fetch result */
static void YoloFastestXLFetchRes(SVP_BLOB_S *pstDstScore, SVP_BLOB_S *pstDstRoi, SVP_BLOB_S *pstClassRoiNum,
    DetectObjInfo resBuf[], int resSize, int* resLen, HI_FLOAT f32PrintResultThresh)
{
    HI_U32 i;
    HI_U32 j;
    HI_U32 u32RoiNumBias = 0;
    HI_U32 u32ScoreBias;
    HI_U32 u32BboxBias;
    HI_FLOAT f32Score;
    HI_S32* ps32Score = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstDstScore->u64VirAddr);
    HI_S32* ps32Roi = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstDstRoi->u64VirAddr);
    HI_S32* ps32ClassRoiNum = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstClassRoiNum->u64VirAddr);
    HI_U32 u32ClassNum = pstClassRoiNum->unShape.stWhc.u32Width;
    HI_ASSERT(u32ClassNum == (3+1)); // 3: the number of class   1:background
    HI_ASSERT(resSize > 0);
    int resId = 0;
    *resLen = 0;
    memset_s(resBuf, resSize * sizeof(resBuf[0]), 0x00, resSize * sizeof(resBuf[0]));
    u32RoiNumBias += ps32ClassRoiNum[0];
    for (i = 1; i < u32ClassNum; i++) {
        u32ScoreBias = u32RoiNumBias;
        u32BboxBias = u32RoiNumBias * SAMPLE_SVP_NNIE_COORDI_NUM;
        /* if the confidence score greater than result threshold, the result will be printed */
        if ((HI_FLOAT)ps32Score[u32ScoreBias] / SAMPLE_SVP_NNIE_QUANT_BASE >=
            f32PrintResultThresh && ps32ClassRoiNum[i] != 0) {
        }
        for (j = 0; j < (HI_U32)ps32ClassRoiNum[i]; j++) {
            f32Score = (HI_FLOAT)ps32Score[u32ScoreBias + j] / SAMPLE_SVP_NNIE_QUANT_BASE;
            if (f32Score < f32PrintResultThresh) {
                SAMPLE_PRT("f32Score:%.2f\n", f32Score);
                break;
            }
            if (resId >= resSize) {
                SAMPLE_PRT("YoloFastestXL resBuf full\n");
                break;
            }
            resBuf[resId].cls = i; // class 
            resBuf[resId].score = f32Score; //score

            RectBox *box = &resBuf[resId].box;
            box->xmin = ps32Roi[u32BboxBias + j * SAMPLE_SVP_NNIE_COORDI_NUM];
            box->ymin = ps32Roi[u32BboxBias + j * SAMPLE_SVP_NNIE_COORDI_NUM + ARRAY_SUBSCRIPT_OFFSET_1];
            box->xmax = ps32Roi[u32BboxBias + j * SAMPLE_SVP_NNIE_COORDI_NUM + ARRAY_SUBSCRIPT_OFFSET_2];
            box->ymax = ps32Roi[u32BboxBias + j * SAMPLE_SVP_NNIE_COORDI_NUM + ARRAY_SUBSCRIPT_OFFSET_3];
            if (box->xmin >= box->xmax || box->ymin >= box->ymax) {
                SAMPLE_PRT("yolo1_orig: {%d, %d, %d, %d}, %f, discard for coord ERR\n",
                    box->xmin, box->ymin, box->xmax, box->ymax, f32Score);
            } else {
                resId++;
            }
        }
        u32RoiNumBias += ps32ClassRoiNum[i];
    }

    *resLen = resId;
}
static HI_S32 SAMPLE_SVP_NNIE_Detection_PrintResult(SVP_BLOB_S *pstDstScore, SVP_BLOB_S *pstDstRoi,
    SVP_BLOB_S *pstClassRoiNum, HI_FLOAT f32PrintResultThresh)
{
    HI_U32 i = 0, j = 0;
    HI_U32 u32RoiNumBias = 0;
    HI_U32 u32ScoreBias = 0;
    HI_U32 u32BboxBias = 0;
    HI_FLOAT f32Score = 0.0f;
    HI_S32 *ps32Score = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstDstScore->u64VirAddr);
    HI_S32 *ps32Roi = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstDstRoi->u64VirAddr);
    HI_S32 *ps32ClassRoiNum = SAMPLE_SVP_NNIE_CONVERT_64BIT_ADDR(HI_S32, pstClassRoiNum->u64VirAddr);
    HI_U32 u32ClassNum = pstClassRoiNum->unShape.stWhc.u32Width;
    HI_S32 s32XMin = 0, s32YMin = 0, s32XMax = 0, s32YMax = 0;

    u32RoiNumBias += ps32ClassRoiNum[0];
    for (i = 1; i < u32ClassNum; i++) {
        u32ScoreBias = u32RoiNumBias;
        u32BboxBias = u32RoiNumBias * SAMPLE_SVP_NNIE_COORDI_NUM;
        /* if the confidence score greater than result threshold, the result will be printed */
        if ((HI_FLOAT)ps32Score[u32ScoreBias] / SAMPLE_SVP_NNIE_QUANT_BASE >= f32PrintResultThresh &&
            ps32ClassRoiNum[i] != 0) {
            SAMPLE_SVP_TRACE_INFO("==== The %dth class box info====\n", i);
        }
        for (j = 0; j < (HI_U32)ps32ClassRoiNum[i]; j++) {
            f32Score = (HI_FLOAT)ps32Score[u32ScoreBias + j] / SAMPLE_SVP_NNIE_QUANT_BASE;
            if (f32Score < f32PrintResultThresh) {
                break;
            }
            s32XMin = ps32Roi[u32BboxBias + j * SAMPLE_SVP_NNIE_COORDI_NUM];
            s32YMin = ps32Roi[u32BboxBias + j * SAMPLE_SVP_NNIE_COORDI_NUM + 1]; /* to get next element of this array */
            s32XMax = ps32Roi[u32BboxBias + j * SAMPLE_SVP_NNIE_COORDI_NUM + 2]; /* to get next element of this array */
            s32YMax = ps32Roi[u32BboxBias + j * SAMPLE_SVP_NNIE_COORDI_NUM + 3]; /* to get next element of this array */
            SAMPLE_SVP_TRACE_INFO("XMin:%d YMin:%d XMax:%d YMax:%d f32Score: %f\n", s32XMin, s32YMin, s32XMax, s32YMax, f32Score);
        }
        u32RoiNumBias += ps32ClassRoiNum[i];
    }
    return HI_SUCCESS;
}
/* function : calculation yuv image */
int YoloFastestCalImg(SAMPLE_SVP_NNIE_CFG_S* self,const IVE_IMAGE_S *img,DetectObjInfo resBuf[],int resize,int* resLen)
{
    HI_FLOAT f32PrintResultThresh = 0.0f;
    SAMPLE_SVP_NNIE_INPUT_DATA_INDEX_S stInputDataIdx = {0};
    SAMPLE_SVP_NNIE_PROCESS_SEG_INDEX_S stProcSegIdx = {0};
    HI_S32 s32Ret;

    // Fill src data
    self->pszPic = NULL;
    stInputDataIdx.u32SegIdx = 0;
    stInputDataIdx.u32NodeIdx = 0;
    s32Ret = FillNnieByImg(self, &g_styolofastestXLNnieParam, 0, 0, img);
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, YOLO_FASTEST_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,SAMPLE_SVP_NNIE_FillSrcData failed!\n");
    // NNIE process(process the 0-th segment)
    stProcSegIdx.u32SegIdx = 0;
    s32Ret = SAMPLE_SVP_NNIE_Forward(&g_styolofastestXLNnieParam, &stInputDataIdx, &stProcSegIdx, HI_TRUE);
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, YOLO_FASTEST_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,SAMPLE_SVP_NNIE_Forward failed!\n");
	/***********************************************************/
    /* Software process */
    /* if user has changed net struct, please make sure SAMPLE_SVP_NNIE_Yolov2_GetResult
     function input datas are correct */
    s32Ret = SAMPLE_SVP_NNIE_YoloFastestXL_GetResult(&g_styolofastestXLNnieParam, &g_stYolofastestXLSoftwareParam);
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, YOLO_FASTEST_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,SAMPLE_SVP_NNIE_YoloFastestXL_GetResult failed!\n");
    f32PrintResultThresh = 0.8f;
    // SAMPLE_SVP_TRACE_INFO("YoloFastestXL result:\n");
    (void)SAMPLE_SVP_NNIE_Detection_PrintResult(&g_stYolofastestXLSoftwareParam.stDstScore, &g_stYolofastestXLSoftwareParam.stDstRoi,
        &g_stYolofastestXLSoftwareParam.stClassRoiNum, f32PrintResultThresh);
    YoloFastestXLFetchRes(&g_stYolofastestXLSoftwareParam.stDstScore,
        &g_stYolofastestXLSoftwareParam.stDstRoi, &g_stYolofastestXLSoftwareParam.stClassRoiNum, resBuf, resize, resLen,f32PrintResultThresh);
    /***********************************************************/
    return 0;
  
    YOLO_FASTEST_FAIL_0:
        return -1;
}
/* function : YoloFastest software para init */
static HI_S32 SampleSvpNnieYoloFastestSoftwareInit(SAMPLE_SVP_NNIE_CFG_S* pstCfg, SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam,
    SAMPLE_SVP_NNIE_YOLOFASTESTXL_SOFTWARE_PARAM_S *pstSoftWareParam)
{
    HI_S32 s32Ret = HI_SUCCESS;
    HI_U32 u32ClassNum = 0;     
    HI_U32 u32TotalSize = 0;
    HI_U32 u32DstRoiSize = 0;
    HI_U32 u32DstScoreSize = 0;
    HI_U32 u32ClassRoiNumSize = 0;
    HI_U32 u32TmpBufTotalSize = 0;
    HI_U64 u64PhyAddr = 0;
    HI_U8 *pu8VirAddr = NULL;

    /* The values of the following parameters are related to algorithm principles.
        For details, see related algorithms. */
    pstSoftWareParam->u32OriImHeight = pstNnieParam->astSegData[0].astSrc[0].unShape.stWhc.u32Height;
    pstSoftWareParam->u32OriImWidth = pstNnieParam->astSegData[0].astSrc[0].unShape.stWhc.u32Width;
    pstSoftWareParam->u32BboxNumEachGrid = 3;       //每个Bbbox先验框个数
    pstSoftWareParam->u32ClassNum =3;               //分类的类别数量
    pstSoftWareParam->au32GridNumHeight[0] = 10;    //图片被划分的网格宽度
    pstSoftWareParam->au32GridNumHeight[1] = 20;    //同上，另一节点输出
    pstSoftWareParam->au32GridNumWidth[0] = 10;     //图片被划分的网格高度
    pstSoftWareParam->au32GridNumWidth[1] = 20;     //同上，另一节点输出
    pstSoftWareParam->u32NmsThresh = (HI_U32)(0.3f * SAMPLE_SVP_NNIE_QUANT_BASE);   //NMS阈值
    pstSoftWareParam->u32ConfThresh = (HI_U32)(0.5f * SAMPLE_SVP_NNIE_QUANT_BASE);  //置信度阈值
    pstSoftWareParam->u32MaxRoiNum = 10;            //每张图最多10个Roi
    pstSoftWareParam->af32Bias[0][0] = 115;         //下面都是先验框的宽与高
    pstSoftWareParam->af32Bias[0][1] = 73;
    pstSoftWareParam->af32Bias[0][2] = 119;
    pstSoftWareParam->af32Bias[0][3] = 199;
    pstSoftWareParam->af32Bias[0][4] = 242;
    pstSoftWareParam->af32Bias[0][5] = 238;
    pstSoftWareParam->af32Bias[1][0] = 12;
    pstSoftWareParam->af32Bias[1][1] = 18;
    pstSoftWareParam->af32Bias[1][2] = 37;
    pstSoftWareParam->af32Bias[1][3] = 49;
    pstSoftWareParam->af32Bias[1][4] = 52;
    pstSoftWareParam->af32Bias[1][5] = 132;
    /* Malloc assist buffer memory */
    u32ClassNum = pstSoftWareParam->u32ClassNum + 1;
/*    
    astSeg 网络段       
            ->u16DstNum  网络段的输出节点数                                                    
    SAMPLE_SVP_NNIE_ALIGN16 对齐
    准确率
    u32DstScoreSize（置信度内存空间） = 类别数 * 最大Roi * 数据类型
 */
    SAMPLE_SVP_CHECK_EXPR_RET(SAMPLE_SVP_NNIE_YOLOFASTEST_REPORT_BLOB_NUM != pstNnieParam->pstModel->astSeg[0].u16DstNum,
        HI_FAILURE, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,pstNnieParam->pstModel->astSeg[0].u16DstNum(%d) should be %d!\n",
        pstNnieParam->pstModel->astSeg[0].u16DstNum, SAMPLE_SVP_NNIE_YOLOFASTEST_REPORT_BLOB_NUM);
/* 统计网络所需空间，分配内存*/
    u32TmpBufTotalSize = SAMPLE_SVP_NNIE_YoloFastestXL_GetResultTmpBuf(pstNnieParam, pstSoftWareParam);
    SAMPLE_SVP_CHECK_EXPR_RET(u32TmpBufTotalSize == 0, HI_ERR_SVP_NNIE_ILLEGAL_PARAM, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error, SAMPLE_SVP_NNIE_YoloFastestXL_GetResultTmpBuf failed!\n");
    u32DstRoiSize = SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum * pstSoftWareParam->u32MaxRoiNum * sizeof(HI_U32) *
        SAMPLE_SVP_NNIE_COORDI_NUM);
    u32DstScoreSize = SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum * pstSoftWareParam->u32MaxRoiNum * sizeof(HI_U32));
    u32ClassRoiNumSize = SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum * sizeof(HI_U32));
    u32TotalSize = u32TotalSize + u32DstRoiSize + u32DstScoreSize + u32ClassRoiNumSize + u32TmpBufTotalSize;
    s32Ret = SAMPLE_COMM_SVP_MallocCached("SAMPLE_YOLOFASTEST_INIT", NULL, (HI_U64 *)&u64PhyAddr, (void **)&pu8VirAddr,
        u32TotalSize);
    SAMPLE_SVP_CHECK_EXPR_RET(s32Ret != HI_SUCCESS, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,Malloc memory failed!\n");
/* 统计网络所需空间，分配内存*/ 
    (HI_VOID)memset_s(pu8VirAddr, u32TotalSize, 0, u32TotalSize);
    SAMPLE_COMM_SVP_FlushCache(u64PhyAddr, (void *)pu8VirAddr, u32TotalSize);
    /*     分配软件参数的地址    */
    /* set each tmp buffer addr */
    pstSoftWareParam->stGetResultTmpBuf.u64PhyAddr = u64PhyAddr;
    pstSoftWareParam->stGetResultTmpBuf.u64VirAddr = SAMPLE_SVP_NNIE_CONVERT_PTR_TO_ADDR(HI_U64, pu8VirAddr);

    /* set result blob */

/*
    参考 HiSVP开发指南 NNIE_ForwardWithBbox Bbox 示意图 
    Width equal to class num * 4, Height equal to astBbox[i]’s Height
    排列顺序 x y w h x1 y1 w1 h1
*/
    pstSoftWareParam->stDstRoi.enType = SVP_BLOB_TYPE_S32;
    pstSoftWareParam->stDstRoi.u64PhyAddr = u64PhyAddr + u32TmpBufTotalSize;
    pstSoftWareParam->stDstRoi.u64VirAddr =
        SAMPLE_SVP_NNIE_CONVERT_PTR_TO_ADDR(HI_U64, pu8VirAddr + u32TmpBufTotalSize);
    pstSoftWareParam->stDstRoi.u32Stride = SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum * pstSoftWareParam->u32MaxRoiNum *
        sizeof(HI_U32) * SAMPLE_SVP_NNIE_COORDI_NUM);
    pstSoftWareParam->stDstRoi.u32Num = 1;
    pstSoftWareParam->stDstRoi.unShape.stWhc.u32Chn = 1;
    pstSoftWareParam->stDstRoi.unShape.stWhc.u32Height = 1;
    pstSoftWareParam->stDstRoi.unShape.stWhc.u32Width =
        u32ClassNum * pstSoftWareParam->u32MaxRoiNum * SAMPLE_SVP_NNIE_COORDI_NUM;

/*
    参考 HiSVP开发指南 NNIE_ForwardWithBbox Score 示意图 
    Width equal to class number, Height equal to pstRoi s Height
*/
    pstSoftWareParam->stDstScore.enType = SVP_BLOB_TYPE_S32;
    pstSoftWareParam->stDstScore.u64PhyAddr = u64PhyAddr + u32TmpBufTotalSize + u32DstRoiSize;
    pstSoftWareParam->stDstScore.u64VirAddr =
        SAMPLE_SVP_NNIE_CONVERT_PTR_TO_ADDR(HI_U64, pu8VirAddr + u32TmpBufTotalSize + u32DstRoiSize);
    pstSoftWareParam->stDstScore.u32Stride =
        SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum * pstSoftWareParam->u32MaxRoiNum * sizeof(HI_U32));
    pstSoftWareParam->stDstScore.u32Num = 1;
    pstSoftWareParam->stDstScore.unShape.stWhc.u32Chn = 1;
    pstSoftWareParam->stDstScore.unShape.stWhc.u32Height = 1;
    pstSoftWareParam->stDstScore.unShape.stWhc.u32Width = u32ClassNum * pstSoftWareParam->u32MaxRoiNum;

/* 宽度 = 类别数量 */
    pstSoftWareParam->stClassRoiNum.enType = SVP_BLOB_TYPE_S32;
    pstSoftWareParam->stClassRoiNum.u64PhyAddr = u64PhyAddr + u32TmpBufTotalSize + u32DstRoiSize + u32DstScoreSize;
    pstSoftWareParam->stClassRoiNum.u64VirAddr =
        SAMPLE_SVP_NNIE_CONVERT_PTR_TO_ADDR(HI_U64, pu8VirAddr + u32TmpBufTotalSize + u32DstRoiSize + u32DstScoreSize);
    pstSoftWareParam->stClassRoiNum.u32Stride = SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum * sizeof(HI_U32));
    pstSoftWareParam->stClassRoiNum.u32Num = 1;
    pstSoftWareParam->stClassRoiNum.unShape.stWhc.u32Chn = 1;
    pstSoftWareParam->stClassRoiNum.unShape.stWhc.u32Height = 1;
    pstSoftWareParam->stClassRoiNum.unShape.stWhc.u32Width = u32ClassNum;

    return s32Ret;
}
/******************************************************************************
* function : NNIE参数初始化
******************************************************************************/
static HI_S32 SampleSvpNniePartsDetectParamInit(SAMPLE_SVP_NNIE_CFG_S* pstCfg,
    SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam, SAMPLE_SVP_NNIE_YOLOFASTESTXL_SOFTWARE_PARAM_S* pstSoftWareParam)
{
    HI_S32 s32Ret;
    /* init hardware para */
    s32Ret = SAMPLE_COMM_SVP_NNIE_ParamInit(pstCfg, pstNnieParam);
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, INIT_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error(%#x),SAMPLE_COMM_SVP_NNIE_ParamInit failed!\n", s32Ret);

    /* init software para */
    s32Ret = SampleSvpNnieYoloFastestSoftwareInit(pstCfg, pstNnieParam,
        pstSoftWareParam);
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, INIT_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error(%#x),SAMPLE_SVP_NNIE_PartsDetect_SoftwareInit failed!\n", s32Ret);
    return s32Ret;
INIT_FAIL_0:
    s32Ret = NNIE_PartsDetect_Deinit(pstNnieParam, pstSoftWareParam, NULL);
    SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error(%#x),SAMPLE_SVP_NNIE_PartsDetect_Deinit failed!\n", s32Ret);
    return HI_FAILURE;
}

int PartsDetectCreate(SAMPLE_SVP_NNIE_CFG_S **model, const char* modelFile)
{
	
    SAMPLE_SVP_NNIE_CFG_S *self;
    HI_U32 u32PicNum = 1;
    HI_S32 s32Ret;

    self = (SAMPLE_SVP_NNIE_CFG_S*)malloc(sizeof(*self));
    HI_ASSERT(self);
    if (memset_s(self, sizeof(*self), 0x00, sizeof(*self)) != EOK) {
        HI_ASSERT(0);
    }

    // Set configuration parameter
    self->pszPic = NULL;
    self->u32MaxInputNum = u32PicNum; // max input image num in each batch
    self->u32MaxRoiNum = 0;
    self->aenNnieCoreId[0] = SVP_NNIE_ID_0; // set NNIE core

    // Sys init
    //SAMPLE_COMM_SVP_CheckSysInit();
	
    // CNN Load model
    SAMPLE_SVP_TRACE_INFO("PartsDetect Load model!\n");
    s32Ret = SAMPLE_COMM_SVP_NNIE_LoadModel((char*)modelFile, &g_styolofastestXLModel);
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, YOLO_FASTEST_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,SAMPLE_COMM_SVP_NNIE_LoadModel failed!\n");
		
    // CNN parameter initialization
    SAMPLE_SVP_TRACE_INFO("PartsDetect parameter initialization!\n");
    g_styolofastestXLNnieParam.pstModel = &g_styolofastestXLModel.stModel;
	
    s32Ret = SampleSvpNniePartsDetectParamInit(self, &g_styolofastestXLNnieParam, &g_stYolofastestXLSoftwareParam);
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, YOLO_FASTEST_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,SAMPLE_SVP_NNIE_Cnn_ParamInit failed!\n");

	// record tskBuf
    s32Ret = HI_MPI_SVP_NNIE_AddTskBuf(&(g_styolofastestXLNnieParam.astForwardCtrl[0].stTskBuf));
    SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, YOLO_FASTEST_FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
        "Error,HI_MPI_SVP_NNIE_AddTskBuf failed!\n");
    *model = self;
    return 0;

    YOLO_FASTEST_FAIL_0:
        NNIE_PartsDetect_Deinit(&g_styolofastestXLNnieParam, &g_stYolofastestXLSoftwareParam, &g_styolofastestXLModel);
        *model = NULL;
        return -1;
}

#ifdef __cplusplus
#if __cplusplus
}
#endif
#endif /* End of #ifdef __cplusplus */
